Example #1
0
    movies.append(movie)
for movie in movies:
    driver.get(movie.link)
    print('Processing ', movie.title)
    movie.synopsis = driver.find_element_by_css_selector('div#sinopsis').text
    datos = driver.find_element_by_css_selector(
        'div#tecnicos > p').get_attribute('innerHTML')
    for dato in datos.split('<br>'):
        if 'Género' in dato:
            movie.genre = dato.split(': ')[1]
        if 'Director' in dato:
            movie.director = dato.split(': ')[1]
        if 'Actores' in dato:
            movie.cast = dato.split(': ')[1].split(',')
        if 'Duración' in dato:
            movie.duration = dato.split(': ')[1]
    rooms = driver.find_elements_by_css_selector('.accordion > div.card.panel')
    for r in rooms:
        room = r.find_element_by_css_selector('h2.panel-title')
        types = r.find_elements_by_css_selector(
            '.movie-showtimes-component-combination')
        for t in types:
            type_data = list(
                map(
                    lambda x: x.strip(),
                    t.find_element_by_css_selector(
                        '.movie-showtimes-component-label small').
                    get_attribute('innerHTML').split('•')))
            subtitled = 'Subtitulado' in type_data[2]
            room_with_type = room.text + ' ' + \
                type_data[0] + ' ' + type_data[1]
Example #2
0
movies = []

for tag in tags:
    # print("=========================")
    # print(tag)
    # soup.select('li[data-actors]')
    # print(tag.li)

    movie = Movie()
    # 主演
    movie.actors = tag.attrs['data-actors']
    # 导演
    movie.director = tag.attrs['data-director']
    # 时长
    movie.duration = tag.attrs['data-duration']
    # 豆瓣评分
    movie.rate = tag.attrs["data-rate"]
    # 发布地区
    movie.region = tag.attrs["data-region"]
    # 发布时间
    movie.release = tag.attrs["data-release"]
    # 电影名称
    movie.title = tag.attrs["data-title"]
    # 相关视频地址
    movie.trailer = tag.attrs["data-trailer"]
    # 电影封面
    movie.cover = tag.img.attrs["src"]
    # 豆瓣地址
    movie.threadUrl = tag.a.attrs["href"]