Ejemplo n.º 1
0
 def get_notes(self):
     self.notes = ''
     language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.strip()
     color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.strip()
     sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also')
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ''
     if len(taglines) > 1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, '</div>'))
             if entry:
                 tagline = tagline + entry + '\n'
     if len(language) > 0:
         self.notes = "%s: %s\n" % (_('Language'), language)
     if len(sound) > 0:
         self.notes += "%s: %s\n" % (gutils.strip_tags(
             _('<b>Audio</b>')), sound)
     if len(color) > 0:
         self.notes += "%s: %s\n" % (_('Color'), color)
     if len(tagline) > 0:
         self.notes += "%s: %s\n" % ('Tagline', tagline)
 def get_notes(self):
     self.notes = ""
     language = gutils.regextrim(self.page, "Language:<[^>]+>", "</div>")
     language = gutils.strip_tags(language)
     language = re.sub("[\n]+", "", language)
     language = re.sub("[ ]+", " ", language)
     language = language.strip()
     color = gutils.regextrim(self.page, "Color:<[^>]+>", "</div>")
     color = gutils.strip_tags(color)
     color = re.sub("[\n]+", "", color)
     color = re.sub("[ ]+", " ", color)
     color = color.strip()
     sound = gutils.regextrim(self.page, "Sound Mix:<[^>]+>", "</div>")
     sound = gutils.strip_tags(sound)
     sound = re.sub("[\n]+", "", sound)
     sound = re.sub("[ ]+", " ", sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, ">Taglines", ">See also")
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ""
     if len(taglines) > 1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, "</div>"))
             if entry:
                 tagline = tagline + entry + "\n"
     if len(language) > 0:
         self.notes = "%s: %s\n" % (_("Language"), language)
     if len(sound) > 0:
         self.notes += "%s: %s\n" % (gutils.strip_tags(_("<b>Audio</b>")), sound)
     if len(color) > 0:
         self.notes += "%s: %s\n" % (_("Color"), color)
     if len(tagline) > 0:
         self.notes += "%s: %s\n" % ("Tagline", tagline)
Ejemplo n.º 3
0
 def get_image(self):
     self.image_url = string.replace(
         string.replace(gutils.trim(self.page, '"picture":', ','), '"', ''),
         '\\', '')
     if not self.image_url:
         tmpdata = gutils.regextrim(self.page, '<div class="cover-area">',
                                    '</div>')
         if tmpdata:
             # video page
             tmpdata = re.search(
                 '(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata)
             if tmpdata:
                 self.image_url = tmpdata.group(1)
         else:
             # kino page
             tmpdata = gutils.before(self.page,
                                     '<span style="line-height: 15px;">')
             if tmpdata:
                 tmpparts = re.split('http://images.kino.de/s/', tmpdata)
                 if len(tmpparts) > 2:
                     self.image_url = 'http://images.kino.de/s/' + gutils.before(
                         tmpparts[2], '"')
                 elif len(tmpparts) > 1:
                     self.image_url = 'http://images.kino.de/s/' + gutils.before(
                         tmpparts[1], '"')
         if not self.image_url and self.videopage:
             tmpdata = gutils.regextrim(self.videopage,
                                        '<div class="cover-area">',
                                        '</div>')
             if tmpdata:
                 # video page
                 tmpdata = re.search(
                     '(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata)
                 if tmpdata:
                     self.image_url = tmpdata.group(1)
Ejemplo n.º 4
0
 def get_o_title(self):
     self.o_title = gutils.regextrim(self.page, 'class="title-extra"[^>]*>', '<')
     if not self.o_title:
         self.o_title = gutils.regextrim(self.page, '<h1>', '([ ]|[&][#][0-9]+[;])<span')
     if not self.o_title:
         self.o_title = re.sub(' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
     self.o_title = re.sub('"', '', self.o_title)
Ejemplo n.º 5
0
 def get_o_title(self):
     self.o_title = gutils.regextrim(self.page, 'class="title-extra"[^>]*>', '<')
     if not self.o_title:
         self.o_title = gutils.regextrim(self.page, '<h1>', '([ ]|[&][#][0-9]+[;])<span')
     if not self.o_title:
         self.o_title = re.sub(' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
     self.o_title = re.sub('"', '', self.o_title)
Ejemplo n.º 6
0
 def get_notes(self):
     self.notes = ''
     language = gutils.regextrim(self.page, 'Language:<[^>]+>', '</div>')
     language = gutils.strip_tags(language)
     language = re.sub('[\n]+', '', language)
     language = re.sub('[ ]+', ' ', language)
     language = language.strip()
     color = gutils.regextrim(self.page, 'Color:<[^>]+>', '</div>')
     color = gutils.strip_tags(color)
     color = re.sub('[\n]+', '', color)
     color = re.sub('[ ]+', ' ', color)
     color = color.strip()
     sound = gutils.regextrim(self.page, 'Sound Mix:<[^>]+>', '</div>')
     sound = gutils.strip_tags(sound)
     sound = re.sub('[\n]+', '', sound)
     sound = re.sub('[ ]+', ' ', sound)
     sound = sound.strip()
     tagline = gutils.regextrim(self.tagl_page, '>Taglines', '>See also')
     taglines = re.split('<div[^>]+class="soda[^>]*>', tagline)
     tagline = ''
     if len(taglines)>1:
         for entry in taglines[1:]:
             entry = gutils.clean(gutils.before(entry, '</div>'))
             if entry:
                 tagline = tagline + entry + '\n'
     if len(language)>0:
         self.notes = "%s: %s\n" %(_('Language'), language)
     if len(sound)>0:
         self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>')), sound)
     if len(color)>0:
         self.notes += "%s: %s\n" %(_('Color'), color)
     if len(tagline)>0:
         self.notes += "%s: %s\n" %('Tagline', tagline)
 def get_o_title(self):
     self.o_title = gutils.regextrim(self.page, 'class="title-extra"[^>]*>', "<")
     if not self.o_title:
         self.o_title = gutils.regextrim(self.page, "<h1>", "([ ]|[&][#][0-9]+[;])<span")
     if not self.o_title:
         self.o_title = re.sub(" [(].*", "", gutils.trim(self.page, "<title>", "</title>"))
     self.o_title = re.sub('"', "", self.o_title)
 def get_country(self):
     self.country = gutils.regextrim(self.tmp_page, 'span class="standardsmall"[^>]*><strong>((DVD|VHS|Laser Disc|Video CD|Blue-ray Disc)</strong>[ \t]-[ \t]<strong>)*', '</span>')
     if self.country <> None:
         self.country = gutils.regextrim(self.country, '-[ \t]<strong>', '</strong>')
         self.country = re.sub('[0-9]+$', '', self.country)
     else:
         self.country = ''
 def get_o_title(self):
     self.o_title = gutils.trim(self.tmp_page, 'span class="standardsmall">(', ')<')
     if self.o_title == '':
         if self.url_type == 'V':
             self.o_title = gutils.after(gutils.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/videofilm', '</a>'), '>')
         else:
             self.o_title = gutils.after(gutils.regextrim(self.tmp_page, 'headline2"[^>]*>[ \t\r\n]*<a href="/kinofilm', '</a>'), '>')
 def get_image(self):
     self.image_url = string.replace(string.replace(gutils.trim(self.page, '"picture":', ','), '"', ''), '\\', '')
     if not self.image_url:
         tmpdata = gutils.regextrim(self.page, '<div class="cover-area">', '</div>')
         if tmpdata:
             # video page
             tmpdata = re.search('(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata)
             if tmpdata:
                 self.image_url = tmpdata.group(1)
         else:
             # kino page
             tmpdata = gutils.before(self.page, '<span style="line-height: 15px;">')
             if tmpdata:
                 tmpparts = re.split('http://images.kino.de/s/', tmpdata)
                 if len(tmpparts) > 2:
                     self.image_url = 'http://images.kino.de/s/' + gutils.before(tmpparts[2], '"')
                 elif len(tmpparts) > 1:
                     self.image_url = 'http://images.kino.de/s/' + gutils.before(tmpparts[1], '"')
         if not self.image_url and self.videopage:
             tmpdata = gutils.regextrim(self.videopage, '<div class="cover-area">', '</div>')
             if tmpdata:
                 # video page
                 tmpdata = re.search('(http[:][/][/][^/]+[/]flbilder[/][^"\']+)', tmpdata)
                 if tmpdata:
                     self.image_url = tmpdata.group(1)
 def get_image(self):
     tmp = gutils.regextrim(self.page, 'plakat.php[?]', '["\']')
     if tmp:
         page_image = self.open_page(url='http://www.filmdb.de/plakat.php?' + tmp)
         tmp = gutils.regextrim(self.page, 'bilder.filmdb.de', '["\']')
         if tmp:
             self.image_url = 'http://bilder.filmdb.de' + tmp
Ejemplo n.º 12
0
 def get_classification(self):
     self.classification = string.replace(
         gutils.trim(self.page, '"fsk":', ','), '"', '')
     if not self.classification:
         self.classification = gutils.regextrim(self.page, 'FSK: ', '<')
     if not self.classification and self.videopage:
         self.classification = gutils.regextrim(self.videopage, 'FSK: ',
                                                '<')
Ejemplo n.º 13
0
 def get_image(self):
     tmp = gutils.regextrim(self.page, 'plakat.php[?]', '["\']')
     if tmp:
         page_image = self.open_page(
             url='http://www.filmdb.de/plakat.php?' + tmp)
         tmp = gutils.regextrim(self.page, 'bilder.filmdb.de', '["\']')
         if tmp:
             self.image_url = 'http://bilder.filmdb.de' + tmp
 def get_notes(self):
     self.notes = ''
     critica = gutils.clean(string.replace(gutils.regextrim(self.page, 'Critica</font>', "(</td>|\n|Note<)"), '<br>', '\n'))
     if critica:
         self.notes = 'Critica:\n\n' + critica + '\n\n'
     note = gutils.clean(string.replace(gutils.regextrim(self.page, 'Note</font>', "(</td>|\n|Critica<)"), '<br>', '--BR--'))
     if note:
         # string.capwords removes line breaks, preventing them with placeholder --BR--
         note = self.capwords(note)
         self.notes = self.notes + 'Note:\n\n' + string.replace(note, '--br--', '\n')
Ejemplo n.º 15
0
 def get_classification(self):
     # until we can find a way to locate the user, we have to use the US-classification
     self.classification = gutils.trim(self.page, '<meta itemprop="contentRating" content="', '"')
     if not self.classification:
         classificationList = gutils.regextrim(self.cert_page,'id="certifications-list"','<\/ul>')
         if classificationList:
             self.classification = gutils.regextrim(classificationList,'>United States:','<')
         else: # the old way
             self.classification = gutils.trim(self.cert_page, '>Certification:<', '</div>')
             self.classification = gutils.trim(self.classification, '>USA:', '<')
 def get_o_title(self):
     self.o_title = gutils.regextrim(self.page, '(<p>Originaltitel[:] |Originaltitel<[^>]+>)', '(</tr>|</p>)')
     if not self.o_title:
         self.o_title = gutils.trim(self.page, '<h1(', ')')
         if not self.o_title:
             self.o_title = gutils.trim(self.page, '<div class="teaser">', '</')
             if not self.o_title:
                 if self.videopage:
                     self.o_title = gutils.trim(self.videopage, '<p>Originaltitel: ', '</p>')
                 if not self.o_title:
                     self.o_title = gutils.regextrim(self.page, '<h1>', '(</h1>|</span>)')
Ejemplo n.º 17
0
 def get_searches(self):
     if string.find(self.page, '<title>Suche') > 0:
         elements = string.split(self.page, "hit.php3?hit=")
         elements[0] = ''
         for element in elements:
             if element <> '':
                 id = gutils.trim(element, 'movie-', '-')
                 if id <> '':
                     self.ids.append(id)
                     self.titles.append(gutils.strip_tags(string.replace(gutils.regextrim(element, '>', '</[Aa]>'), '<br />', ' - ')))
     else:
         id = gutils.regextrim(self.page, 'index[.]php3[?]id=', '("|;|\')')
         self.ids.append(id)
Ejemplo n.º 18
0
    def get_plot(self):
        self.plot = gutils.trim(self.page, '<b class="ch">Resumen', '<a href="/rg/title-tease/plot')
        self.plot = gutils.after(self.plot, ':</b> ')

        self.plot = gutils.trim(self.page, '<h5>Trama:</h5>', '</div>')
        self.plot = self.__before_more(self.plot)
        tmp = gutils.trim(self.plot_page, '<div id="swiki.2.1">', '</div>')
        if tmp:
            self.plot = tmp
        elements = string.split(self.plot_page, '<p class="plotpar">')
        if len(elements) > 1:
            self.plot = self.plot + '\n\n'
            elements[0] = ''
            for element in elements:
                if element != '':
                    self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n'
        if not self.plot:
            # nothing in spanish found, try original
            self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)')
            self.plot = self.__before_more(self.plot)
            elements = string.split(self.imdb_plot_page, '<p class="plotpar">')
            if len(elements) > 1:
                self.plot = self.plot + '\n\n'
                elements[0] = ''
                for element in elements:
                    if element <> '':
                        self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.creditspage, u"zdjęcia: <", "(</tr>|<tr>)")
     self.cameraman = gutils.after(self.cameraman, ">")
     self.cameraman = self.cameraman.replace("<br />", ", ")
     self.cameraman = gutils.clean(self.cameraman)
     if self.cameraman.endswith(","):
         self.cameraman = self.cameraman[:-1]
Ejemplo n.º 20
0
 def get_plot(self):
     self.plot = ''
     storyid = gutils.regextrim(self.page, '<a href="plot/', '(">|[&])')
     if not storyid is None:
         story_page = self.open_page(url="https://ssl.ofdb.de/plot/%s" % (storyid.encode('utf8')))
         if story_page:
             self.plot = gutils.trim(story_page, "</b><br><br>","</")
 def get_plot(self):
     self.plot = ''
     storyid = gutils.regextrim(self.page, '<a href="plot/', '(">|[&])')
     if not storyid is None:
         story_page = self.open_page(url="http://www.ofdb.de/plot/%s" % (storyid.encode('utf8')))
         if story_page:
             self.plot = gutils.trim(story_page, "</b><br><br>","</")
 def get_cast(self):
     self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]', '(<[pP]>|<br><span[^>]+>)')
     self.cast = gutils.clean(self.cast)
     self.cast = self.cast.replace(' als ', _(' as '))
     self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast)
     self.cast = self.cast.replace(', ', '\n')
     self.cast = self.cast.replace(',', '')
 def get_year(self):
     self.year = ''
     tmp = gutils.regextrim(self.tmp_page, 'span class="standardsmall"[^>]*><strong>', '</span>')
     if tmp <> None:
         srchresult = re.search('[0-9][0-9][0-9][0-9]</strong>', tmp)
         if srchresult <> None:
             self.year = srchresult.string[srchresult.start():srchresult.end()]
 def get_searches(self):
     elements = re.split('&nbsp;<a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page)
     elements[0] = None
     for index in range(1, len(elements), 2):
         element = elements[index + 1]
         if element <> None:
             if elements[index] == '/datenbank/medien/blu-ray/':
                 medium = 'Blu-Ray'
                 self.ids.append('blu-ray/' + gutils.before(element,'"'))
             else:
                 medium = 'DVD'
                 self.ids.append('dvd/' + gutils.before(element,'"'))
             self.titles.append(
                 gutils.trim(element, '>', '</a>') +
                 gutils.clean(
                     '(' + medium + ' - ' +
                     re.sub('[ \t\n]+', ' ',
                     string.replace(
                     string.replace(
                         gutils.regextrim(element, '<div [^>]*>', '</div>'),
                         '<br>', ' - '),
                         '&nbsp;', ''))
                     + ')'
                 )
             )
 def get_image(self):
     # Find the film's poster image
     tmp_poster = gutils.regextrim(self.page, "../images_locandine/%s/" % self.movie_id, ".(JPG|jpg)\"")
     if tmp_poster != "":
         self.image_url = "http://www.cinematografo.it/bancadati/images_locandine/%s/%s.jpg" % (self.movie_id, tmp_poster)
     else:
         self.image_url = ""
Ejemplo n.º 26
0
 def get_searches(self):
     elements = re.split('&nbsp;<a title="[^"]+" href="(/datenbank/medien/dvd/|/datenbank/medien/blu-ray/)', self.page)
     elements[0] = None
     for index in range(1, len(elements), 2):
         element = elements[index + 1]
         if element <> None:
             if elements[index] == '/datenbank/medien/blu-ray/':
                 medium = 'Blu-Ray'
                 self.ids.append('blu-ray/' + gutils.before(element,'"'))
             else:
                 medium = 'DVD'
                 self.ids.append('dvd/' + gutils.before(element,'"'))
             self.titles.append(
                 gutils.trim(element, '>', '</a>') +
                 gutils.clean(
                     '(' + medium + ' - ' +
                     re.sub('[ \t\n]+', ' ',
                     string.replace(
                     string.replace(
                         gutils.regextrim(element, '<div [^>]*>', '</div>'),
                         '<br>', ' - '),
                         '&nbsp;', ''))
                     + ')'
                 )
             )
 def get_plot(self):
     self.plot = re.sub(
         '[0-9 ]+Views', '',
         re.sub(
             '[–]', '-',
             re.sub(
                 '[\x93„]', '"', gutils.regextrim(self.page, 'showcover.php[^>]*>', '</td>'))))
Ejemplo n.º 28
0
 def get_cast(self):
     self.cast = gutils.regextrim(self.page, '[(]Darsteller[)]',
                                  '(</td>|<br><span[^>]+>)')
     self.cast = gutils.clean(self.cast)
     self.cast = self.cast.replace(' als ', _(' as '))
     self.cast = re.sub('( \t|\t|\r|\n)', '', self.cast)
     self.cast = self.cast.replace(',', '\n')
Ejemplo n.º 29
0
 def get_plot(self):
     plotlist = string.split(
         gutils.trim(self.plot_page, 'id="plot-summaries-content">',
                     '</ul>'), '<li')
     plotcompilation = ''
     for listelement in plotlist:
         if listelement <> '' and not 'It looks like we don\'t have any Plot Summaries for this title yet.' in listelement:
             plotcompilation = plotcompilation + gutils.trim(
                 listelement, '<p>', '</p>') + '\n'
             plotcompilation = plotcompilation + re.sub(
                 '<[^<]+?>', '',
                 gutils.trim(listelement, '<div class="author-container">',
                             '</div>').replace('\n', '').lstrip()) + '\n\n'
     if plotcompilation <> '':
         self.plot = plotcompilation
     else:
         self.plot = gutils.regextrim(self.page, 'itemprop="description"',
                                      '<')
         self.plot = gutils.after(self.plot, '>')
         elements = string.split(self.plot_page, '<p class="plotpar">')
         if len(elements) < 2:
             elements = re.split('<li class="(?:odd|even)">',
                                 self.plot_page)
         if len(elements) > 1:
             self.plot = self.plot + '\n\n'
             elements[0] = ''
             for element in elements[1:]:
                 if element <> '':
                     self.plot = self.plot + gutils.strip_tags(
                         gutils.before(element, '</a>')) + '\n\n'
Ejemplo n.º 30
0
 def get_searches(self):
     if string.find(self.page, '<title>Suche') > 0:
         elements = string.split(self.page, "hit.php3?hit=")
         elements[0] = ''
         for element in elements:
             if element <> '':
                 id = gutils.trim(element, 'movie-', '-')
                 if id <> '':
                     self.ids.append(id)
                     self.titles.append(
                         gutils.strip_tags(
                             string.replace(
                                 gutils.regextrim(element, '>', '</[Aa]>'),
                                 '<br />', ' - ')))
     else:
         id = gutils.regextrim(self.page, 'index[.]php3[?]id=', '("|;|\')')
         self.ids.append(id)
Ejemplo n.º 31
0
 def get_o_title(self):
     self.o_title = gutils.regextrim(
         self.page, '(<p>Originaltitel[:] |Originaltitel<[^>]+>)',
         '(</tr>|</p>)')
     if not self.o_title:
         self.o_title = gutils.trim(self.page, '<h1(', ')')
         if not self.o_title:
             self.o_title = gutils.trim(self.page, '<div class="teaser">',
                                        '</')
             if not self.o_title:
                 if self.videopage:
                     self.o_title = gutils.trim(self.videopage,
                                                '<p>Originaltitel: ',
                                                '</p>')
                 if not self.o_title:
                     self.o_title = gutils.regextrim(
                         self.page, '<h1>', '(</h1>|</span>)')
 def get_o_title(self):
     self.o_title = gutils.trim(self.page, '<p>Originaltitel: ', '</p>')
     if not self.o_title:
         self.o_title = gutils.trim(self.page, '<span class="standardsmall">(', ')')
         if not self.o_title:
             self.o_title = gutils.trim(self.page, '<div class="teaser">', '</')
             if not self.o_title:
                 self.o_title = gutils.regextrim(self.page, '<title>', '([|]|</title>)')
Ejemplo n.º 33
0
 def get_o_title(self):
     self.o_title = string.capwords(
         gutils.clean(
             gutils.regextrim(self.page, '<b>Originaltitel:',
                              '(</p>|<b>)')))
     if not self.o_title:
         self.o_title = gutils.after(
             gutils.trim(self.page, 'class=\'film-titel\'', '</h1>'), '>')
Ejemplo n.º 34
0
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.creditspage, u'zdjęcia: <',
                                       '(</tr>|<tr>)')
     self.cameraman = gutils.after(self.cameraman, '>')
     self.cameraman = self.cameraman.replace('<br />', ', ')
     self.cameraman = gutils.clean(self.cameraman)
     if self.cameraman.endswith(','):
         self.cameraman = self.cameraman[:-1]
Ejemplo n.º 35
0
 def get_o_title(self):
     self.o_title = gutils.trim(self.o_page, '<span class="title-extra">',
                                '<i>(original title)</i>')
     if self.o_title == '':
         self.o_title = gutils.regextrim(self.o_page, '<h1>',
                                         '([ ]|[&][#][0-9]+[;])<span')
     if self.o_title == '':
         self.o_title = re.sub(
             '[(].*', '', gutils.trim(self.o_page, '<title>', '</title>'))
 def get_title(self):
     self.title = gutils.trim(self.page, '<h1>', '<span')
     elements = string.split(gutils.regextrim(self.page, '<h5>(Alternativ|Auch bekannt als):', '</div>'), '<i class="transl"')
     if len(elements) > 1:
         for element in elements:
             tmp = gutils.before(gutils.trim(element, '>', '[de]'), '(')
             if tmp <> '':
                 self.title = tmp
                 break
Ejemplo n.º 37
0
 def get_runtime(self):
     self.runtime = gutils.strip_tags(
         gutils.regextrim(self.page, 'Runtime<[^>]+>', 'min<'))
     tmp = string.split(self.runtime, 'h ')
     if len(tmp) > 1:
         try:
             self.runtime = int(tmp[0]) * 60 + int(tmp[1])
         except:
             None
 def get_o_title(self):
     self.o_title = gutils.clean(gutils.after(
         gutils.regextrim(self.page, 'class="text_ergebniss_faz_3"', '[ \t]+[(]Originaltitel[)]'), '</a>'))
     p1 = string.rfind(self.o_title, ',')
     if p1 > 0:
         self.o_title = self.o_title[p1 + 1:]
     self.o_title = string.capwords(self.o_title)
     if self.o_title == '':
         self.o_title = gutils.after(gutils.trim(self.page, 'class="text_ergebniss_faz_3"', '</a>'), '>')
Ejemplo n.º 39
0
 def get_title(self):
     self.title = gutils.trim(self.page, '<h1>', '<span')
     elements = string.split(gutils.regextrim(self.page, '<h5>(Alternativ|Auch bekannt als):', '</div>'), '<i class="transl"')
     if len(elements) > 1:
         for element in elements:
             tmp = gutils.before(gutils.trim(element, '>', '[de]'), '(')
             if tmp <> '':
                 self.title = tmp
                 break
 def get_studio(self):
     self.studio = ''
     tmp = gutils.regextrim(self.comp_page, 'Production Companies<[^>]+', '</ul>')
     tmp = string.split(tmp, 'href="')
     for entry in tmp:
         entry = gutils.trim(entry, '>', '<')
         if entry:
             self.studio = self.studio + entry + ', '
     if self.studio:
         self.studio = self.studio[:-2]
 def get_cameraman(self):
     self.cameraman = ''
     tmp = gutils.regextrim(self.cast_page, 'Cinematography by<[^>]+', '</table>')
     tmp = string.split(tmp, 'href="')
     for entry in tmp:
         entry = gutils.trim(entry, '>', '<')
         if entry:
             self.cameraman = self.cameraman + entry + ', '
     if self.cameraman:
         self.cameraman = self.cameraman[:-2]
Ejemplo n.º 42
0
 def get_studio(self):
     self.studio = ''
     tmp = gutils.regextrim(self.page, '>Production Company<', '(<B>|</TABLE>)')
     elements = re.split('(href|HREF)="/db/companies', tmp)
     for element in elements:
         element = gutils.clean(gutils.trim(element, '>', '<'))
         if element:
             self.studio = self.studio + element + ', '
     if self.studio:
         self.studio = self.studio[:-2]
 def get_plot(self):
     self.plot = gutils.regextrim(self.page, '<h5>Plot:</h5>', '(</div>|<a href.*)')
     self.plot = self.__before_more(self.plot)
     elements = string.split(self.plot_page, '<p class="plotpar">')
     if len(elements) > 1:
         self.plot = self.plot + '\n\n'
         elements[0] = ''
         for element in elements:
             if element <> '':
                 self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
 def get_screenplay(self):
     self.screenplay = gutils.regextrim(self.page, '<th>Buch:', '<th>')
     if not self.screenplay:
         self.screenplay= gutils.trim(self.creditspage, 'Drehbuch:&nbsp;', '</tr>')
     if not self.screenplay:
         elements = re.split('<h3>Drehbuch</h3>', self.page)
         delimiter = ''
         for element in elements[1:]:
             self.screenplay = self.screenplay + delimiter + gutils.trim(element, 'itemprop="name">', '<')
             delimiter = ', '
 def get_cameraman(self):
     self.cameraman = gutils.regextrim(self.page, '<th>Kamera:', '(<th>|</table>)')
     if not self.cameraman:
         self.cameraman= gutils.trim(self.creditspage, 'Kamera&nbsp;', '</tr>')
     if not self.cameraman:
         elements = re.split('"function-title">Kamera</span>', self.page)
         delimiter = ''
         for element in elements[1:]:
             self.cameraman = self.cameraman + delimiter + gutils.trim(element, 'itemprop="name">', '<')
             delimiter = ', '
Ejemplo n.º 46
0
 def get_cameraman(self):  # OK v0.1
     self.cameraman = ''
     tmp = gutils.regextrim(self.cast_page, 'Image<[^>]+', '</table>')
     tmp = string.split(tmp, 'href="')
     for entry in tmp:
         entry = gutils.trim(entry, '>', '<')
         if entry:
             self.cameraman = self.cameraman + entry + ', '
     if self.cameraman:
         self.cameraman = self.cameraman[:-2]
Ejemplo n.º 47
0
 def get_studio(self):
     self.studio = ''
     tmp = gutils.regextrim(self.comp_page, 'Production Companies<[^>]+',
                            '</ul>')
     tmp = string.split(tmp, 'href="')
     for entry in tmp:
         entry = gutils.trim(entry, '>', '<')
         if entry:
             self.studio = self.studio + entry + ', '
     if self.studio:
         self.studio = self.studio[:-2]
Ejemplo n.º 48
0
 def get_cameraman(self):
     self.cameraman = ''
     tmp = gutils.regextrim(self.cast_page, '>Cinematography by', '</table>')
     tmp = string.split(tmp, 'href="')
     if len(tmp) > 1:
         for entry in tmp[1:]:
             entry = string.strip(string.replace(gutils.trim(entry, '>', '<'), '\n', ''))
             if entry:
                 self.cameraman = self.cameraman + entry + ', '
         if self.cameraman:
             self.cameraman = self.cameraman[:-2]
Ejemplo n.º 49
0
 def get_o_title(self):
     # it seems, that films coming from the German branch can have their German title in the h1-name-tag;
     # in this case (only?), IMDB renders an additional "originalTitle"-tag.
     self.o_title = gutils.trim(self.page, '<div class="originalTitle">', '<span')
     if not self.o_title:
         self.o_title = gutils.regextrim(self.page, '<h1 itemprop="name"[^>]*>', '&nbsp;')
     if not self.o_title:
         self.o_title = gutils.trim(self.page, 'og:title\' content="', '"')
     if not self.o_title:
         self.o_title = re.sub(' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
     self.o_title = gutils.clean(re.sub('"', '', self.o_title))
Ejemplo n.º 50
0
 def get_o_title(self):
     self.o_title = gutils.trim(self.page, '<p>Originaltitel: ', '</p>')
     if not self.o_title:
         self.o_title = gutils.trim(self.page, '<h1(', ')')
         if not self.o_title:
             self.o_title = gutils.trim(self.page, '<div class="teaser">', '</')
             if not self.o_title:
                 if self.videopage:
                     self.o_title = gutils.trim(self.videopage, '<p>Originaltitel: ', '</p>')
                 if not self.o_title:
                     self.o_title = gutils.regextrim(self.page, '<h1>', '</h1>')
 def get_cameraman(self):
     self.cameraman = ""
     tmp = gutils.regextrim(self.cast_page, ">Cinematography by", "</table>")
     tmp = string.split(tmp, 'href="')
     if len(tmp) > 1:
         for entry in tmp[1:]:
             entry = string.strip(string.replace(gutils.trim(entry, ">", "<"), "\n", ""))
             if entry:
                 self.cameraman = self.cameraman + entry + ", "
         if self.cameraman:
             self.cameraman = self.cameraman[:-2]
 def get_studio(self):
     self.studio = ""
     tmp = gutils.regextrim(self.comp_page, 'name="production"', "</ul>")
     tmp = string.split(tmp, 'href="')
     if len(tmp) > 1:
         for entry in tmp[1:]:
             entry = string.strip(string.replace(gutils.trim(entry, ">", "<"), "\n", ""))
             if entry:
                 self.studio = self.studio + entry + ", "
         if self.studio:
             self.studio = self.studio[:-2]
 def get_cast(self):
     # Find the actors. Try to make it comma separated.
     self.cast = gutils.regextrim(self.page, ">Attori</font>", '(<font class="fontViolaB">|\n)')
     self.cast = string.replace(self.cast, "target='_self'>", "\n>")
     self.cast = string.replace(self.cast, "<a>", _(" as ").encode('utf8'))
     self.cast = string.replace(self.cast, "</tr><tr>", '\n')
     self.cast = string.replace(self.cast, "...vedi il resto del cast", '')
     self.cast = gutils.clean(self.cast)
     self.cast = string.replace(self.cast, "&nbsp;&nbsp;", ' ')
     self.cast = re.sub('[ ]+', ' ', self.cast)
     self.cast = re.sub('\n[ ]+', '\n', self.cast)
Ejemplo n.º 54
0
 def get_o_title(self):  # OK v0.1
     #~ self.o_title = gutils.trim(self.page, 'class="title-extra">', '<')
     self.o_title = gutils.trim(
         gutils.trim(self.page, '<h5>Alias:</h5><div class="info-content">',
                     '</div>'), '"', '"')
     if not self.o_title:  # same conditions as title
         self.o_title = gutils.regextrim(self.page, '<h1>',
                                         '([ ]|[&][#][0-9]+[;])<span')
     if not self.o_title:
         self.o_title = re.sub(
             ' [(].*', '', gutils.trim(self.page, '<title>', '</title>'))
Ejemplo n.º 55
0
 def get_studio(self):
     self.studio = ''
     tmp = gutils.regextrim(self.comp_page, 'name="production"', '</ul>')
     tmp = string.split(tmp, 'href="')
     if len(tmp)>1:
         for entry in tmp[1:]:
             entry = string.strip(string.replace(gutils.trim(entry, '>', '<'), '\n', ''))
             if entry:
                 self.studio = self.studio + entry + ', '
         if self.studio:
             self.studio = self.studio[:-2]
Ejemplo n.º 56
0
 def get_plot(self):
     self.plot = gutils.regextrim(self.page, '<h5>Plot:</h5>',
                                  '(</div>|<a href.*)')
     self.plot = self.__before_more(self.plot)
     elements = string.split(self.plot_page, '<p class="plotpar">')
     if len(elements) > 1:
         self.plot = self.plot + '\n\n'
         elements[0] = ''
         for element in elements:
             if element <> '':
                 self.plot = self.plot + gutils.strip_tags(
                     gutils.before(element, '</a>')) + '\n\n'