Python ct 예제들, beautifulsoup.ct Python 예제들

예제 #1

0

파일 보기

파일: import_wff.py 프로젝트: thuvh/filmmaster

    def parse_film(self, soup):
        belka = soup.find(ct('ps_belka'))
        title_pl = tag_text(belka.find('h2'))
        title = tag_text(belka.find('h4'))
        rows = soup.findAll(ct('row'))
        def _row(r):
            p = tag_text(r).split(':', 1)
            if len(p) == 2:
                return p[0].strip(), p[1].strip()
            else:
                return None, None

        props = dict(_row(row)  for row in rows)

        screenings = soup.find(ct('pokazy'))
        screenings = screenings and screenings.findAll('li')[1:]
        screenings = screenings and [tag_text(s) for s in screenings] or []
        
        img_url = soup.find(ct('ps_body')).find('img')
        img_url = img_url and img_url.parent['href']

        return {
            'title':title,
            'title_localized':title_pl,
#            'props':props,
            'directors':filter(bool, [i.strip() for i in props.get(u'Reżyser', '').split(',')]),
            'times':[self.parse_t(t) for t in screenings],
            'year':props.get('Rok produkcji', None),
            'img_url':img_url,
        }

예제 #2

0

파일 보기

파일: import_lff.py 프로젝트: thuvh/filmmaster

 def parse_t(self, t):
     date = tag_text(t.find(ct('perf_date')))
     day = re.match("\w+\s*(\d+)", date).group(1)
     h, m = re.match("(\d+):(\d+)", tag_text(t.find(ct('perf_time')))).groups()
     venue = tag_text(t.find(ct('perf_venue'))).strip()
     theater = self.get_theater(venue)
     if not theater:
         logger.warning('no theater found for %r', venue)
         return
     t = self.get_screening_datetime(h, m, day)
     return theater, pytz.timezone(theater.timezone_id).localize(t), venue

예제 #3

0

파일 보기

 def parse_t(self, t):
     date = tag_text(t.find(ct('perf_date')))
     day = re.match("\w+\s*(\d+)", date).group(1)
     h, m = re.match("(\d+):(\d+)",
                     tag_text(t.find(ct('perf_time')))).groups()
     venue = tag_text(t.find(ct('perf_venue'))).strip()
     theater = self.get_theater(venue)
     if not theater:
         logger.warning('no theater found for %r', venue)
         return
     t = self.get_screening_datetime(h, m, day)
     return theater, pytz.timezone(theater.timezone_id).localize(t), venue

예제 #4

0

파일 보기

파일: import_lff.py 프로젝트: thuvh/filmmaster

 def films(self):
     for i in range(20111012, 20111028):
         path  = '/lff/calendar/%d' % i
         soup = self.get_soup(path)
         films = soup.findAll(ct('calendar-teaser-timeblock'))
         for f in films:
             showtimes = f.findAll(ct('perf_row'))
             times = [self.parse_t(t) for t in showtimes]
             times = [t for t in times if t]
             details = self.get_soup(f.find(ct('show_title')).find('a')['href'])
             parsed = self.parse_film(details)
             parsed['times'] = times
             title = parsed.get('title')
             if title:
                 yield parsed

예제 #5

0

파일 보기

 def films(self):
     for i in range(20111012, 20111028):
         path = '/lff/calendar/%d' % i
         soup = self.get_soup(path)
         films = soup.findAll(ct('calendar-teaser-timeblock'))
         for f in films:
             showtimes = f.findAll(ct('perf_row'))
             times = [self.parse_t(t) for t in showtimes]
             times = [t for t in times if t]
             details = self.get_soup(
                 f.find(ct('show_title')).find('a')['href'])
             parsed = self.parse_film(details)
             parsed['times'] = times
             title = parsed.get('title')
             if title:
                 yield parsed

예제 #6

0

파일 보기

    def parse_film(self, soup):
        title = soup.find(id="header-one-films")
        title = title and tag_text(title.find('h1'))
        props = soup.findAll(ct('screening-with-credits-item'))
        props = dict((tag_text(p.find(ct('screening-with-credits-left'))),
                      tag_text(p.find(ct('screening-with-credits-right'))))
                     for p in props)
        directors = [i.strip() for i in props.get('Director', '').split(',')]
        synopsis = tag_text(soup.find(ct('program-item-alternatetitle')))
        try:
            year = props.get('Year')
            year = year and int(year) or None
        except ValueError:
            year = None

        return {
            'title': title,
            'directors': filter(bool, directors),
            'year': year,
            'synopsis': synopsis,
        }

예제 #7

0

파일 보기

파일: import_wff.py 프로젝트: manlan2/filmaster

 def films(self):
     for part in ['special'] + [chr(ord('a') + i) for i in range(26)]:
         for subpage in range(10):
             path = '/filmy/wszystkie/%s/%s/' % (part, subpage)
             soup = self.get_soup(path)
             films = soup.findAll(ct('nowina'))
             if films:
                 for f in films:
                     url = '/' + f.find('a')['href']
                     details = self.get_soup(url)
                     yield self.parse_film(details)
             else:
                 break

예제 #8

0

파일 보기

파일: import_wff.py 프로젝트: thuvh/filmmaster

 def films(self):
     for part in ['special'] + [chr(ord('a') + i) for i in range(26)]:
         for subpage in range(10):
             path = '/filmy/wszystkie/%s/%s/' % (part, subpage)
             soup = self.get_soup(path)
             films = soup.findAll(ct('nowina'))
             if films:
                 for f in films:
                     url = '/' + f.find('a')['href']
                     details = self.get_soup(url)
                     yield self.parse_film(details)
             else:
                 break

예제 #9

0

파일 보기

파일: import_lff.py 프로젝트: thuvh/filmmaster

    def parse_film(self, soup):
        title = soup.find(id="header-one-films")
        title = title and tag_text(title.find('h1'))
        props = soup.findAll(ct('screening-with-credits-item')) 
        props = dict((
            tag_text(p.find(ct('screening-with-credits-left'))), 
            tag_text(p.find(ct('screening-with-credits-right')))
            ) for p in props)
        directors = [i.strip() for i in props.get('Director', '').split(',')]
        synopsis = tag_text(soup.find(ct('program-item-alternatetitle')))
        try:
            year = props.get('Year')
            year = year and int(year) or None
        except ValueError:
            year = None


        return {
                'title': title,
                'directors':filter(bool, directors),
                'year':year,
                'synopsis':synopsis,
                }

예제 #10

0

파일 보기

파일: import_wff.py 프로젝트: manlan2/filmaster

    def parse_film(self, soup):
        belka = soup.find(ct('ps_belka'))
        title_pl = tag_text(belka.find('h2'))
        title = tag_text(belka.find('h4'))
        rows = soup.findAll(ct('row'))

        def _row(r):
            p = tag_text(r).split(':', 1)
            if len(p) == 2:
                return p[0].strip(), p[1].strip()
            else:
                return None, None

        props = dict(_row(row) for row in rows)

        screenings = soup.find(ct('pokazy'))
        screenings = screenings and screenings.findAll('li')[1:]
        screenings = screenings and [tag_text(s) for s in screenings] or []

        img_url = soup.find(ct('ps_body')).find('img')
        img_url = img_url and img_url.parent['href']

        return {
            'title':
            title,
            'title_localized':
            title_pl,
            #            'props':props,
            'directors':
            filter(bool,
                   [i.strip() for i in props.get(u'Reżyser', '').split(',')]),
            'times': [self.parse_t(t) for t in screenings],
            'year':
            props.get('Rok produkcji', None),
            'img_url':
            img_url,
        }