def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(gutils.before(element, '</li>'), '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element,'"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def get_searches(self): elements1 = re.split('href="/kinofilm/', self.page) elements1[0] = None for element in elements1: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': self.ids.append("K_" + re.sub('[?].*', '', gutils.before(element,'"'))) self.titles.append('Kino: ' + title) elements2 = re.split('href="http://www.video.de/videofilm/', self.page) elements2[0] = None for element in elements2: if element <> None: title = gutils.clean(gutils.trim(element,'>','</a>')) + string.replace(' (' + gutils.clean(gutils.trim(element, '<p>', "<br />")) + ')', '()', '') if title != ' ': id = re.sub('[?].*', '', gutils.before(element,'"')) self.ids.append("V_" + id) type = '' if 'blu-ray-disc-kauf' in id: type = ' (Bluray-Kauf)' if 'blu-ray-disc-leih' in id: type = ' (Bluray-Verleih)' if 'dvd-leih' in id: type = ' (DVD-Verleih)' if 'dvd-kauf' in id: type = ' (DVD-Kauf)' self.titles.append('Video: ' + title + type)
def get_cast(self): self.cast = "" casts = gutils.trim(self.page_cast, 'Acteurs', '</table>') parts = string.split(casts, '<td ') for index in range(1, len(parts) - 1, 2): character = gutils.after(parts[index], '>') actor = gutils.after(parts[index + 1], '>') self.cast = self.cast + gutils.clean(actor) + _(' as ') + gutils.clean(character) + '\n'
def get_cast(self): self.cast = "" casts = gutils.trim(self.page_cast, "Acteurs", "</table>") parts = string.split(casts, "<td ") for index in range(1, len(parts) - 1, 3): character = gutils.after(parts[index + 1], ">") actor = gutils.after(parts[index + 2], ">") self.cast = self.cast + gutils.clean(actor) + _(" as ") + gutils.clean(character) + "\n"
def parse_movie(self): try: fields = list(self.fields_to_fetch) # make a copy self.initialize() if 'year' in fields: self.get_year() self.year = gutils.digits_only(self.year, 2100) fields.pop(fields.index('year')) if 'runtime' in fields: self.get_runtime() self.runtime = gutils.digits_only(self.runtime) fields.pop(fields.index('runtime')) if 'rating' in fields: self.get_rating() self.rating = gutils.digits_only(self.rating, 10) fields.pop(fields.index('rating')) if 'cast' in fields: self.get_cast() self.cast = gutils.clean(self.cast) if not isinstance(self.cast, unicode): self.cast = gutils.gdecode(self.cast, self.encode) fields.pop(fields.index('cast')) if 'plot' in fields: self.get_plot() self.plot = gutils.clean(self.plot) if not isinstance(self.plot, unicode): self.plot = gutils.gdecode(self.plot, self.encode) fields.pop(fields.index('plot')) if 'notes' in fields: self.get_notes() self.notes = gutils.clean(self.notes) if not isinstance(self.notes, unicode): self.notes = gutils.gdecode(self.notes, self.encode) fields.pop(fields.index('notes')) if 'image' in fields: self.get_image() self.fetch_picture() fields.pop(fields.index('image')) for i in fields: getattr(self, "get_%s" % i)() self[i] = gutils.clean(self[i]) if not isinstance(self[i], unicode): self[i] = gutils.gdecode(self[i], self.encode) if 'o_title' in self.fields_to_fetch and self.o_title is not None: if self.o_title[:4] == u'The ': self.o_title = self.o_title[4:] + u', The' if 'title' in self.fields_to_fetch and self.title is not None: if self.title[:4] == u'The ': self.title = self.title[4:] + u', The' except: log.exception('') finally: # close the progress dialog which was opened in get_movie self.progress.hide()
def parse_movie(self): try: fields = list(self.fields_to_fetch) # make a copy self.initialize() if "year" in fields: self.get_year() self.year = gutils.digits_only(self.year, 2100) fields.pop(fields.index("year")) if "runtime" in fields: self.get_runtime() self.runtime = gutils.digits_only(self.runtime) fields.pop(fields.index("runtime")) if "rating" in fields: self.get_rating() self.rating = gutils.digits_only(self.rating, 10) fields.pop(fields.index("rating")) if "cast" in fields: self.get_cast() self.cast = gutils.clean(self.cast) if not isinstance(self.cast, unicode): self.cast = gutils.gdecode(self.cast, self.encode) fields.pop(fields.index("cast")) if "plot" in fields: self.get_plot() self.plot = gutils.clean(self.plot) if not isinstance(self.plot, unicode): self.plot = gutils.gdecode(self.plot, self.encode) fields.pop(fields.index("plot")) if "notes" in fields: self.get_notes() self.notes = gutils.clean(self.notes) if not isinstance(self.notes, unicode): self.notes = gutils.gdecode(self.notes, self.encode) fields.pop(fields.index("notes")) if "image" in fields: self.get_image() self.fetch_picture() fields.pop(fields.index("image")) for i in fields: getattr(self, "get_%s" % i)() self[i] = gutils.clean(self[i]) if not isinstance(self[i], unicode): self[i] = gutils.gdecode(self[i], self.encode) if "o_title" in self.fields_to_fetch and self.o_title is not None: if self.o_title[:4] == u"The ": self.o_title = self.o_title[4:] + u", The" if "title" in self.fields_to_fetch and self.title is not None: if self.title[:4] == u"The ": self.title = self.title[4:] + u", The" finally: # close the progress dialog which was opened in get_movie self.progress.hide()
def get_notes(self): self.notes = '' critica = gutils.clean(string.replace(gutils.regextrim(self.page, 'Critica</font>', "(</td>|\n|Note<)"), '<br>', '\n')) if critica: self.notes = 'Critica:\n\n' + critica + '\n\n' note = gutils.clean(string.replace(gutils.regextrim(self.page, 'Note</font>', "(</td>|\n|Critica<)"), '<br>', '--BR--')) if note: # string.capwords removes line breaks, preventing them with placeholder --BR-- note = self.capwords(note) self.notes = self.notes + 'Note:\n\n' + string.replace(note, '--br--', '\n')
def get_searches(self): elements = re.split('<tr>', self.page) for index in range(1, len(elements), 1): element = elements[index] titleandid = gutils.trim(element, '<td class="title">', '</td>') title = gutils.clean(titleandid) id = gutils.trim(titleandid, 'href="', '"') idstart = string.rfind(id, '/') id = id[idstart + 1:] year = gutils.trim(element, '<td class="year">', '</td>') self.ids.append(id) self.titles.append(title + ' (' + gutils.clean(year)+ ')')
def get_cast(self): self.cast = '' tmp = gutils.trim(self.page, '>Cast<', '</TABLE>') elements = re.split('(href|HREF)="/db/people', tmp) for element in elements: actor = gutils.clean(gutils.trim(element, '>', '<')) if actor: role = gutils.clean(gutils.trim(element, '>...', '</TR>')) if role: self.cast = self.cast + actor + _(' as ') + role + '\n' else: self.cast = self.cast + actor + '\n'
def get_notes(self): self.notes = '' # ...type atype = gutils.trim(self.page, '"field">Type', '</td>') atype = gutils.clean(atype) if atype != '': self.notes += "Type: %s\n" % atype # ...number of episodes episodes = gutils.trim(self.page, '"field">Episodes', '</td>') episodes = gutils.clean(episodes) if episodes != '': self.notes += "Episodes: %s\n" % episodes
def get_searches(self): elements = string.split(self.page, '<a href=\'/film/fichefilm_gen_cfilm=') if (elements[0] <> ''): for index in range(1, len(elements), 1): element = elements[index] title = gutils.clean(gutils.convert_entities(gutils.trim(element, '>', '</a>'))) year = gutils.clean(gutils.trim(element, '<span class="fs11">', '<br')) if title: self.ids.append(gutils.before(element, '.')) if year: self.titles.append(title + ' (' + year + ')') else: self.titles.append(title)
def parse_movie(self): from copy import deepcopy fields = deepcopy(self.fields_to_fetch) self.initialize() if "year" in fields: self.get_year() self.year = gutils.digits_only(self.year, 2100) fields.pop(fields.index("year")) if "runtime" in fields: self.get_runtime() self.runtime = gutils.digits_only(self.runtime) fields.pop(fields.index("runtime")) if "rating" in fields: self.get_rating() self.rating = gutils.digits_only(self.rating, 10) fields.pop(fields.index("rating")) if "cast" in fields: self.get_cast() self.cast = gutils.clean(self.cast) self.cast = gutils.gdecode(self.cast, self.encode) fields.pop(fields.index("cast")) if "plot" in fields: self.get_plot() self.plot = gutils.clean(self.plot) self.plot = gutils.gdecode(self.plot, self.encode) fields.pop(fields.index("plot")) if "notes" in fields: self.get_notes() self.notes = gutils.clean(self.notes) self.notes = gutils.gdecode(self.notes, self.encode) fields.pop(fields.index("notes")) if "image" in fields: self.get_image() self.fetch_picture() fields.pop(fields.index("image")) for i in fields: getattr(self, "get_%s" % i)() self[i] = gutils.clean(self[i]) self[i] = gutils.gdecode(self[i], self.encode) if "o_title" in self.fields_to_fetch and self.o_title is not None: if self.o_title[:4] == "The ": self.o_title = self.o_title[4:] + ", The" if "title" in self.fields_to_fetch and self.title is not None: if self.title[:4] == "The ": self.title = self.title[4:] + ", The"
def get_cast(self): self.cast = "" casts = gutils.trim(self.page_cast, 'Acteurs, rôles, personnages', '<h2>') parts = string.split(casts, 'href="/personne/fichepersonne_gen_cpersonne=') for index in range(1, len(parts), 1): character = gutils.clean(gutils.trim(parts[index], 'Rôle :', '<')) if not character: character = gutils.clean(gutils.trim(parts[index - 1], '<td>', '</td>')) actor = gutils.clean(gutils.trim(parts[index], '>', '<')) if actor: if character: self.cast = self.cast + actor + _(' as ') + character + '\n' else: self.cast = self.cast + actor + '\n'
def parse_movie(self): from copy import deepcopy fields = deepcopy(self.fields_to_fetch) self.initialize() if 'year' in fields: self.get_year() self.year = gutils.digits_only(self.year, 2100) fields.pop(fields.index('year')) if 'runtime' in fields: self.get_runtime() self.runtime = gutils.digits_only(self.runtime) fields.pop(fields.index('runtime')) if 'rating' in fields: self.get_rating() self.rating = gutils.digits_only(self.rating, 10) fields.pop(fields.index('rating')) if 'cast' in fields: self.get_cast() self.cast = gutils.clean(self.cast) self.cast = gutils.gdecode(self.cast, self.encode) fields.pop(fields.index('cast')) if 'plot' in fields: self.get_plot() self.plot = gutils.clean(self.plot) self.plot = gutils.gdecode(self.plot, self.encode) fields.pop(fields.index('plot')) if 'notes' in fields: self.get_notes() self.notes = gutils.clean(self.notes) self.notes = gutils.gdecode(self.notes, self.encode) fields.pop(fields.index('notes')) if 'image' in fields: self.get_image() self.fetch_picture() fields.pop(fields.index('image')) for i in fields: getattr(self, "get_%s" % i)() self[i] = gutils.clean(self[i]) self[i] = gutils.gdecode(self[i], self.encode) if 'o_title' in self.fields_to_fetch and self.o_title is not None: if self.o_title[:4] == 'The ': self.o_title = self.o_title[4:] + ', The' if 'title' in self.fields_to_fetch and self.title is not None: if self.title[:4] == 'The ': self.title = self.title[4:] + ', The'
def search(self, parent_window): self.open_search(parent_window) tmp_pagemovie = self.page # # try to get all result pages (not so nice, but it works) # tmp_pagecount = gutils.clean(gutils.trim(tmp_pagemovie, ">von", "</a>")) try: tmp_pagecountint = int(tmp_pagecount) except: tmp_pagecountint = 1 tmp_pagecountintcurrent = 1 while tmp_pagecountint > tmp_pagecountintcurrent and tmp_pagecountintcurrent < 5: tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1 self.url = ( "http://www.kino.de/search.php?mode=megaSearch&searchCategory=film&page=" + str(tmp_pagecountintcurrent) + "&inputSearch=" ) self.open_search(parent_window) tmp_pagemovie = tmp_pagemovie + self.page # # Look for DVD and VHS # self.url = "http://www.kino.de/search.php?mode=megaSearch&searchCategory=video&inputSearch=" self.open_search(parent_window) tmp_pagevideo = tmp_pagemovie + self.page # # try to get all result pages (not so nice, but it works) # tmp_pagecount = gutils.clean(gutils.trim(self.page, ">von", "</a>")) try: tmp_pagecountint = int(tmp_pagecount) except: tmp_pagecountint = 1 tmp_pagecountintcurrent = 1 while tmp_pagecountint > tmp_pagecountintcurrent and tmp_pagecountintcurrent < 5: tmp_pagecountintcurrent = tmp_pagecountintcurrent + 1 self.url = ( "http://www.kino.de/search.php?mode=megaSearch&searchCategory=video&page=" + str(tmp_pagecountintcurrent) + "&inputSearch=" ) self.open_search(parent_window) tmp_pagevideo = tmp_pagevideo + self.page self.page = tmp_pagevideo return self.page
def get_rating(self): self.rating = gutils.trim(self.page, '<h5>Nutzer-Bewertung:</h5>', '/10') if self.rating: try: self.rating = str(float(gutils.clean(self.rating))) except: self.rating = ''
def get_cast(self): self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]', '(</td>|<br><span[^>]+>)') self.cast = gutils.clean(self.cast) self.cast = self.cast.replace(' als ', _(' as ')) self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast) self.cast = self.cast.replace(',', '\n')
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines)>1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(tagline)>0: self.notes += "%s: %s\n" %('Tagline', tagline)
def get_notes(self): self.notes = '' language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.strip() color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.strip() sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also') taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = '' if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, '</div>')) if entry: tagline = tagline + entry + '\n' if len(language) > 0: self.notes = "%s: %s\n" % (_('Language'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color'), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ('Tagline', tagline)
def get_director(self): self.director = gutils.trim(self.creditspage, u"reżyseria: <", "</tr>") self.director = gutils.after(self.director, ">") self.director = self.director.replace("<br />", ", ") self.director = gutils.clean(self.director) if self.director.endswith(","): self.director = self.director[:-1]
def get_screenplay(self): self.screenplay = gutils.trim(self.creditspage, u"scenariusz: <", "</tr>") self.screenplay = gutils.after(self.screenplay, ">") self.screenplay = self.screenplay.replace("<br />", ", ") self.screenplay = gutils.clean(self.screenplay) if self.screenplay.endswith(","): self.screenplay = self.screenplay[:-1]
def get_plot(self): self.plot = gutils.trim(self.page, '<div class="yui-content">', '<div class="footer">') if not self.plot: # kino page self.plot = gutils.after( gutils.trim(self.page, 'Filmhandlung & Hintergrund', '</div>'), '</h2>') if not self.plot and self.videopage: self.plot = gutils.trim(self.videopage, '<div class="yui-content">', '<div class="footer">') if self.plot: # video page self.plot = re.sub('<script type="text/javascript">[^<]+</script>', '', self.plot) self.plot = string.replace(self.plot, '>Großansicht</a>', '>') self.plot = string.replace(self.plot, '>Schließen</a>', '>') self.plot = string.replace(self.plot, '>zurück </a>', '>') self.plot = string.replace(self.plot, '>1</a>', '>') self.plot = string.replace(self.plot, '> weiter</a>', '>') self.plot = string.replace(self.plot, '</h4>', '\n') self.plot = gutils.clean(self.plot) compiledmultiline = re.compile(r'^[^(]+[(]Foto[:][^)]+[)][ ]*$', re.MULTILINE) self.plot = compiledmultiline.sub('', self.plot) compiledmultiline = re.compile(r"(^\s+$|^\s*//\s*$)", re.MULTILINE) self.plot = compiledmultiline.sub('', self.plot) compiledmultiline = re.compile("^[\n]+$", re.MULTILINE) self.plot = compiledmultiline.sub("\n", self.plot)
def get_searches(self): if not self.page: return if len(self.page) < 20: # immidietly redirection to movie page self.number_results = 1 self.ids.append(self.page) self.titles.append(self.url) else: # multiple matches elements = string.split(self.page, '</a></b>') if (elements[0] <> ''): for index in range(0, len(elements) - 1, 1): element = elements[index] nextelement = elements[index + 1] id = gutils.trim(element, '<b><a href="/es/film', '.html') if id: self.ids.append(id) title = gutils.clean( gutils.after(element, '<b><a href="/es/film')).replace( "\n", "") title = gutils.strip_tags( gutils.convert_entities(gutils.after( title, '>'))) + ' ' + string.strip( gutils.before(nextelement, '<')) self.titles.append(title)
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"') date = re.sub('[\n]+', '', date) date = re.sub('[ ]+', ' ', date) date = gutils.clean(date) if len(language) > 0: self.notes = "%s: %s\n" % (_('Language'), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags( _('<b>Audio</b>')), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_('Color'), color) if len(date) > 0: self.notes += "%s: %s\n" % (_('Data di uscita'), date)
def get_searches(self): elements = re.split(' <a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page) elements[0] = None for index in range(1, len(elements), 2): element = elements[index + 1] if element <> None: if elements[index] == '/datenbank/medien/blu-ray/': medium = 'Blu-Ray' self.ids.append('blu-ray/' + gutils.before(element,'"')) else: medium = 'DVD' self.ids.append('dvd/' + gutils.before(element,'"')) self.titles.append( gutils.trim(element, '>', '</a>') + gutils.clean( '(' + medium + ' - ' + re.sub('[ \t\n]+', ' ', string.replace( string.replace( gutils.regextrim(element, '<div [^>]*>', '</div>'), '<br>', ' - '), ' ', '')) + ')' ) )
def get_genre(self): self.genre = gutils.trim(self.page,"Genre(s):","</table>") self.genre = string.replace(self.genre, "<br>", ", ") self.genre = gutils.strip_tags(self.genre) self.genre = string.replace(self.genre, "/", ", ") self.genre = gutils.clean(self.genre) self.genre = self.genre[0:-1]
def get_genre(self): self.genre = '' tmp = gutils.trim(self.page, '<th>GÉNERO</th>', '</tr>') tmp = gutils.after(tmp, '<td>') if tmp: self.genre = gutils.clean(string.replace(tmp, ' | ', '. ')) self.genre = re.sub('[.][ \t]+', '. ', self.genre)
def get_cameraman(self): self.cameraman = gutils.regextrim(self.creditspage, u"zdjęcia: <", "(</tr>|<tr>)") self.cameraman = gutils.after(self.cameraman, ">") self.cameraman = self.cameraman.replace("<br />", ", ") self.cameraman = gutils.clean(self.cameraman) if self.cameraman.endswith(","): self.cameraman = self.cameraman[:-1]
def get_notes(self): self.notes = '' language = gutils.trim(self.page, '<h5>Lingua:</h5>', '</div>') language = gutils.strip_tags(language) language = re.sub('[\n]+', '', language) language = re.sub('[ ]+', ' ', language) language = language.rstrip() color = gutils.trim(self.page, '<h5>Colore:</h5>', '</div>') color = gutils.strip_tags(color) color = re.sub('[\n]+', '', color) color = re.sub('[ ]+', ' ', color) color = color.rstrip() sound = gutils.trim(self.page, '<h5>Sonoro:</h5>', '</div>') sound = gutils.strip_tags(sound) sound = re.sub('[\n]+', '', sound) sound = re.sub('[ ]+', ' ', sound) sound = sound.rstrip() date = gutils.trim(self.page, '<h5>Data di uscita:</h5>', '<a class="tn15more inline"') date = re.sub('[\n]+', '', date) date = re.sub('[ ]+', ' ', date) date = gutils.clean(date) if len(language)>0: self.notes = "%s: %s\n" %(_('Language'), language) if len(sound)>0: self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound) if len(color)>0: self.notes += "%s: %s\n" %(_('Color'), color) if len(date)>0: self.notes += "%s: %s\n" %(_('Data di uscita'), date)
def get_rating(self): self.rating = gutils.clean(gutils.after(gutils.trim(self.page, '<span class="rating', '</a>'), '>')) if self.rating: try: self.rating = str(round(float(self.rating))) except: self.rating = ''
def get_rating(self): self.rating = gutils.trim(self.page, "<b>Nutzer-Bewertung:</b>", "/10") if self.rating: try: self.rating = str(float(gutils.clean(self.rating))) except: self.rating = ""
def get_notes(self): self.notes = "" language = gutils.regextrim(self.page, "Language:<[^>]+>", "</div>") language = gutils.strip_tags(language) language = re.sub("[\n]+", "", language) language = re.sub("[ ]+", " ", language) language = language.strip() color = gutils.regextrim(self.page, "Color:<[^>]+>", "</div>") color = gutils.strip_tags(color) color = re.sub("[\n]+", "", color) color = re.sub("[ ]+", " ", color) color = color.strip() sound = gutils.regextrim(self.page, "Sound Mix:<[^>]+>", "</div>") sound = gutils.strip_tags(sound) sound = re.sub("[\n]+", "", sound) sound = re.sub("[ ]+", " ", sound) sound = sound.strip() tagline = gutils.regextrim(self.tagl_page, ">Taglines", ">See also") taglines = re.split('<div[^>]+class="soda[^>]*>', tagline) tagline = "" if len(taglines) > 1: for entry in taglines[1:]: entry = gutils.clean(gutils.before(entry, "</div>")) if entry: tagline = tagline + entry + "\n" if len(language) > 0: self.notes = "%s: %s\n" % (_("Language"), language) if len(sound) > 0: self.notes += "%s: %s\n" % (gutils.strip_tags(_("<b>Audio</b>")), sound) if len(color) > 0: self.notes += "%s: %s\n" % (_("Color"), color) if len(tagline) > 0: self.notes += "%s: %s\n" % ("Tagline", tagline)
def get_cast(self): self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]', '(<[pP]>|<br><span[^>]+>)') self.cast = gutils.clean(self.cast) self.cast = self.cast.replace(' als ', _(' as ')) self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast) self.cast = self.cast.replace(', ', '\n') self.cast = self.cast.replace(',', '')
def get_screenplay(self): self.screenplay = '' tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>') elements = string.split(tmp, '<dt>') for element in elements: if string.find(element, 'Screenwriter') > 0: self.screenplay = gutils.clean(gutils.before(element, '</a>'))
def get_cameraman(self): self.cameraman = '' tmp = gutils.trim(self.page_cast, '<h2>crew</h2>', '</dl>') elements = string.split(tmp, '<dt>') for element in elements: if string.find(element, 'Cinematographer') > 0: self.cameraman = gutils.clean(gutils.before(element, '</a>'))
def capwords( self, name ): # Does not work with accented letters => discarded in titles tmp = gutils.clean(name) if tmp == string.upper(tmp): return string.capwords(name) return name
def get_director(self): self.director = gutils.trim(self.creditspage, u'reżyseria: <', '</tr>') self.director = gutils.after(self.director, '>') self.director = self.director.replace('<br />', ', ') self.director = gutils.clean(self.director) if self.director.endswith(','): self.director = self.director[:-1]
def get_runtime(self): self.runtime = gutils.clean(gutils.trim(self.page, u'Durée : ', '</span>')) if self.runtime: if self.runtime.find('H') > 0: self.runtime = str (int(gutils.before(self.runtime,'H'))*60 + int(gutils.after(self.runtime,'H'))) else: self.runtime = gutils.before(self.runtime,' mn')
def get_director(self): self.director = gutils.trim(self.page,'<h5>Regie</h5>', '<br/>') if self.director == '': self.director = gutils.trim(self.page,'<h5>Regisseur:</h5>', '</div>') self.director = self.__before_more(self.director) self.director = self.director.replace('<br/>', ', ') self.director = gutils.clean(self.director) self.director = re.sub(',$', '', self.director)
def get_rating(self): self.rating = gutils.after( gutils.trim(self.page, 'id="movie-rat-avg"', '</div>'), '>') if self.rating: self.rating = str( round( float(gutils.clean(string.replace(self.rating, ',', '.')))))
def get_cameraman(self): self.cameraman = gutils.regextrim(self.creditspage, u'zdjęcia: <', '(</tr>|<tr>)') self.cameraman = gutils.after(self.cameraman, '>') self.cameraman = self.cameraman.replace('<br />', ', ') self.cameraman = gutils.clean(self.cameraman) if self.cameraman.endswith(','): self.cameraman = self.cameraman[:-1]
def get_cast(self): self.cast = gutils.trim(self.page, u'<b>Elenco:</b>', u'</td>') self.cast = string.replace(self.cast, u'<br>', u'\n') self.cast = string.replace(self.cast, u', ', u'') self.cast = string.replace(self.cast, u'\t', u'') self.cast = string.replace(self.cast, u'\n ', u'\n') self.cast = gutils.clean(self.cast) self.cast = re.sub('[ \t]*[\n]+[ \t]*' , '\n', self.cast)
def get_rating(self): """Find the film's rating. From 0 to 10. Convert if needed when assigning.""" self.rating = gutils.clean(gutils.trim(self.page, u'IMDB: ', u'</span>')) try: self.rating = round(float(self.rating), 0) except Exception, e: self.rating = 0
def get_cameraman(self): self.cameraman = gutils.trim(self.cast_page, '>Kamera</a>', '</table>') self.cameraman = string.replace(self.cameraman, '(Kamera)', '') self.cameraman = string.replace(self.cameraman, '(nicht im Abspann)', '') self.cameraman = string.replace(self.cameraman, '</a>', ', ') self.cameraman = gutils.clean(self.cameraman) self.cameraman = re.sub(',[ \t]*$', '', self.cameraman) self.cameraman = re.sub('[ ]+', ' ', self.cameraman)
def get_o_title(self): self.o_title = string.capwords( gutils.clean( gutils.regextrim(self.page, '<b>Originaltitel:', '(</p>|<b>)'))) if not self.o_title: self.o_title = gutils.after( gutils.trim(self.page, 'class=\'film-titel\'', '</h1>'), '>')
def get_screenplay(self): self.screenplay = gutils.trim(self.creditspage, u'scenariusz: <', '</tr>') self.screenplay = gutils.after(self.screenplay, '>') self.screenplay = self.screenplay.replace('<br />', ', ') self.screenplay = gutils.clean(self.screenplay) if self.screenplay.endswith(','): self.screenplay = self.screenplay[:-1]
def get_notes(self): self.notes = "" tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Sprachen:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Sprachen:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Untertitel:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Untertitel:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Tonformat:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Tonformat:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>Bildformat:</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "Bildformat:\n" + tmp_notes + "\n\n" tmp_notes = gutils.clean(gutils.trim(self.page, "<strong>EAN</strong>", "</p>")) if tmp_notes != "": self.notes = self.notes + "EAN:\n" + tmp_notes + "\n\n"
def get_cameraman(self): # Find the cameraman self.cameraman = gutils.trim(self.page, 'Fotografia</font></td></tr><tr>', '<td colspan="2"') self.cameraman = string.replace(self.cameraman, '<tr>', ', ') # beautification self.cameraman = gutils.clean(self.cameraman) self.cameraman = string.replace(self.cameraman, ' ,', ',') self.cameraman = re.sub('[ ]+', ' ', self.cameraman) self.cameraman = re.sub('[,][ ]*$', '', self.cameraman)
def get_screenplay(self): # Find the screenplay self.screenplay = gutils.trim(self.page, 'Sceneggiatura</font></td></tr><tr>', '<td colspan="2"') self.screenplay = string.replace(self.screenplay, '<tr>', ', ') # beautification self.screenplay = gutils.clean(self.screenplay) self.screenplay = string.replace(self.screenplay, ' ,', ',') self.screenplay = re.sub('[ ]+', ' ', self.screenplay) self.screenplay = re.sub('[,][ ]*$', '', self.screenplay)
def get_searches(self): elements = re.findall( """/film/film.asp\?fi=(\d+)"[^>]*>.*?searchTitle\s*textB">(.*?)</span>.*?"> (.*?)</span>""", self.page) self.number_results = len(elements) for element in elements: self.ids.append(element[0]) self.titles.append(gutils.clean(element[1]) + ' ' + element[2])