movies.append(movie) for movie in movies: driver.get(movie.link) print('Processing ', movie.title) movie.synopsis = driver.find_element_by_css_selector('div#sinopsis').text datos = driver.find_element_by_css_selector( 'div#tecnicos > p').get_attribute('innerHTML') for dato in datos.split('<br>'): if 'Género' in dato: movie.genre = dato.split(': ')[1] if 'Director' in dato: movie.director = dato.split(': ')[1] if 'Actores' in dato: movie.cast = dato.split(': ')[1].split(',') if 'Duración' in dato: movie.duration = dato.split(': ')[1] rooms = driver.find_elements_by_css_selector('.accordion > div.card.panel') for r in rooms: room = r.find_element_by_css_selector('h2.panel-title') types = r.find_elements_by_css_selector( '.movie-showtimes-component-combination') for t in types: type_data = list( map( lambda x: x.strip(), t.find_element_by_css_selector( '.movie-showtimes-component-label small'). get_attribute('innerHTML').split('•'))) subtitled = 'Subtitulado' in type_data[2] room_with_type = room.text + ' ' + \ type_data[0] + ' ' + type_data[1]
movies = [] for tag in tags: # print("=========================") # print(tag) # soup.select('li[data-actors]') # print(tag.li) movie = Movie() # 主演 movie.actors = tag.attrs['data-actors'] # 导演 movie.director = tag.attrs['data-director'] # 时长 movie.duration = tag.attrs['data-duration'] # 豆瓣评分 movie.rate = tag.attrs["data-rate"] # 发布地区 movie.region = tag.attrs["data-region"] # 发布时间 movie.release = tag.attrs["data-release"] # 电影名称 movie.title = tag.attrs["data-title"] # 相关视频地址 movie.trailer = tag.attrs["data-trailer"] # 电影封面 movie.cover = tag.img.attrs["src"] # 豆瓣地址 movie.threadUrl = tag.a.attrs["href"]