def main(): ms = spider() if len(ms) == 100: Movie.save_all(ms) print('成功爬取所有页面并存入数据库') else: print('爬取页面有遗漏')
def setUp(self): self.ctx = app.app_context() self.ctx.push() db.create_all() self.m1 = Movie.new(title='t1', year='2019') self.m2 = Movie.new({'title': 't2', 'year': '2019'})
def _resolve_movie(sapo_id, sapo_title, sapo_description): """Resolve movie based on id, title and description""" id_alias = Movie.from_pymongo( ms.get_movie_alias_by_id(sapo_id)) # Movie alias based on id if id_alias is None: same_titles = Movie.from_pymongo( ms.get_movie_in_db_by_title(sapo_title)) # Search by title title_aliases = Movie.from_pymongo( ms.get_movie_alias_by_title(sapo_title)) # Search by title aliases alias_candidates = same_titles + list( filter(lambda e: e.sapo_id not in [x.sapo_id for x in same_titles], title_aliases)) for alias_candidate in alias_candidates: if SequenceMatcher(None, alias_candidate.sapo_description, sapo_description).ratio() > 0.5: return alias_candidate # Match found based on title for alias_of in ms.get_alias_movie_by_aliasof( alias_candidate.sapo_id): if (alias_of['sapo_title'] == sapo_title or alias_of['sapo_title'] in alias_candidate.alias_titles) \ and SequenceMatcher(None, alias_of['sapo_description'], sapo_description).ratio() > 0.5: return alias_candidate # Match found based on alias else: return id_alias # Match found based on id return None
def test_set_awards_attributes_no_awards_info(movie_list): movie = movie_list[0] Movie.set_awards_attributes(None, movie) assert movie.oscars_won == 0 assert movie.oscar_nominations == 0 assert movie.awards_won == 0 assert movie.award_nominations == 0
def get_list(self, page=1, category_code="ALL"): is_continue = True res = rq.get(Naver.MOVIES_URL % (category_code, page)) soup = BeautifulSoup(res.content, 'lxml') items = soup.select('.lst_thum_wrap .lst_thum li a') results = [] Crawler.progress_bar(len(items), 0, 0) for idx, item in enumerate(items): href, product_no, title, body = self.parse(item) movie = Movie(href, product_no, title, body, category_code) sleep = 0 if not movie.is_exist_by_redis(): movie.save() results.append(movie) sleep = 1 Crawler.progress_bar(len(items), idx + 1, sleep) if len(items) != Naver.MAX_CNT_OF_PAGE: is_continue = False return is_continue, results
def recommendation(): cursor_about = request.json.get("cursorAbout") user_matrix = request.json.get("userMatrix", {}) selection = request.json.get('selection', []) clean_selection = [int(uid) for uid in selection] results = Movie.related_base(cursor_about, clean_selection) scoring = {} for index, row in enumerate(results): movie = Movie.inflate(row[0]) bases = [Base.inflate(b).name for b in row[1]] content = { 'title': movie.title, 'score': 0, 'relations': [], 'data': movie.serialize } for key, value in user_matrix.items(): if key in bases: content['score'] += value content['relations'].append(key) scoring[index] = content return scoring
def add_new_movie(args): """Add new movie to the database.""" title_or_imdb_id = replace_underscores(args.movie_identifier) try: omdb = OmdbApiResponse(title_or_imdb_id, args.imdb_id) except URLError: print('Unable to receive data from OMDb API. ' 'Check your internet connection.') else: if omdb.response: cnx = connection(DATABASE) c = cnx.cursor() check_db = Movie.load_by_imdb_id(c, omdb.movie_data['imdbID']) if check_db is None: movie = Movie.create_object_from_omdb_data(omdb.movie_data) m = movie.save(c) if m: print(f'Movie: {movie.title} has been successfully saved ' f'to the database') else: print(f'Movie: {omdb.movie_data["Title"]} already in the ' f'database') cnx.commit() c.close() cnx.close() else: print(f'Movie: {title_or_imdb_id} not found.')
def add_movies(): session.add_all([ Movie(name="The Hunger Games: Catching Fire", rating=7.5), Movie(name="Wreck-It Ralph", rating=7.8), Movie(name="Her", rating=8.3), Movie(name="Avengers: Infinity War", rating=8.8)]) session.commit()
def _make_movie_object(soup, min_rating=None): data = [] div_movies = soup.findAll("div", {"class": "lister-item mode-advanced"}) for div_movie in div_movies: #'bs4.element.Tag' print(type(div_movie)) name = _get_movie_name(div_movie) year = _get_movie_year(div_movie) movie_id = _get_movie_id(div_movie) movie_runtime = _get_movie_runtime(div_movie) rating = _get_movie_rating(div_movie) stars = _get_movie_stars(div_movie) directors = _get_movie_directors(div_movie) summary = _get_movie_summary(div_movie) genre = _get_movie_genre(div_movie) movie = Movie(id=movie_id, title=name, runtime=movie_runtime, summary=summary, year=year, rating=rating, stars=stars, directors=directors, genre=genre) data.append(movie.to_dict()) return data
def forge(): """产生 mock 数据""" username = '******' password = '******' movies = [ { 'title': 'My Neighbor Totoro', 'year': '1988' }, { 'title': 'Dead Poets Society', 'year': '1989' }, { 'title': 'A Perfect World', 'year': '1993' }, { 'title': 'Leon', 'year': '1994' }, { 'title': 'Mahjong', 'year': '1996' }, { 'title': 'Swallowtail Butterfly', 'year': '1996' }, { 'title': 'King of Comedy', 'year': '1999' }, { 'title': 'Devils on the Doorstep', 'year': '1999' }, { 'title': 'WALL-E', 'year': '2008' }, { 'title': 'The Pork of Music', 'year': '2012' }, ] # 生成一个测试用户 u = User.new(username=username) u.set_hash_password(password) for m in movies: form = { 'title': m['title'], 'year': m['year'], } Movie.new(form) click.echo('Mock data generated.')
def test_update_by_id(self): m1_id = Movie.query.filter_by(title='t1').first().id m2_id = Movie.query.filter_by(title='t2').first().id Movie.update_by_id(m1_id, year='2018') self.assertEqual('2018', Movie.query.filter_by(title='t1').first().year) Movie.update_by_id(m2_id, {'title': 'T2'}) self.assertEqual('T2', Movie.query.filter_by(year='2019').first().title)
def add_movie(): if request.method == 'GET': return render_template('add_movie.html', genres=genres) elif request.method == 'POST': movie = Movie(None, request.form['name'], request.form['genre'], request.form['release_year'], request.form['duration'], request.form['description'], request.form['rating'], request.form['director_name']) movie.create(movie) return redirect('/movies')
def load_movies(self, file_path): try: with open(file_path, 'r') as csv_file: reader = csv.reader(csv_file, delimiter=',') # Skip header, next(reader, None) for row in reader: movie = Movie(movie_id=int(row[0]), title=row[1]) self._movies[movie.get_id()] = movie except Exception as e: self._movies= {} print("Could not load movies.", e)
def delete_movie(): """ Delete movie by id """ data = get_request_data() if 'id' in data.keys(): try: row_id = int(data['id']) Movie.delete(row_id) except: err = 'Id must be integer' return make_response(jsonify(error=err), 400) msg = 'Record successfully deleted' return make_response(jsonify(message=msg), 200)
def get_candidates(movie): """Getting movie candidates from Google search""" query = movie.sapo_title + ' imdb' params = {'key': CONFIG.GOOGLE_KEY, 'cx': CONFIG.GOOGLE_CX, 'q': query} url = CONFIG.GOOGLE_ENDPOINT + '?' + urllib.parse.urlencode(params) print(url) response = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) candidates = [] for item in response['items']: if 'pagemap' in item and 'displayLink' in item and 'metatags' in item['pagemap'] and \ item['displayLink'] == 'www.imdb.com': for metatag in item['pagemap']['metatags']: if 'og:site_name' in metatag and metatag['og:site_name'] == 'IMDb' and \ 'og:title' in metatag and \ '(TV Series' not in metatag['og:title'] and \ '(Video Game' not in metatag['og:title'] and \ 'Official Trailer' not in metatag['og:title'] and \ 'pageid' in metatag and not any(c.imdb_id == metatag['pageid'] for c in candidates): candidate = Movie() candidate.sapo_id = movie.sapo_id candidate.sapo_title = movie.sapo_title candidate.sapo_description = movie.sapo_description candidate.imdb_id = metatag['pageid'] candidate.imdb_title = metatag['og:title'] candidate.imdb_description = metatag['og:description'] if complete_movie_with_omdb( candidate ): # Adding further attributes to the movie object candidates.append(candidate) return candidates
def add_movie(): if "token" not in session: return redirect("/login") if request.method == "GET": return render_template("add_movie.html") elif request.method == "POST": form = request.form title = form["title"] image = form["image"] year = form["year"] username = session["token"] user = User.objects(username=username).first() new_movie = Movie(title=title, image=image, year=year, user=user) new_movie.save() return "OKE"
def all_movies(self): sql = "SELECT * FROM movies" cursor = connection.cursor() cursor.execute(sql) records = cursor.fetchall() movie_list = [] for record in records: movie = Movie(record[0], record[1], record[2], float(record[3]), record[4], record[5]) movie_list.append(movie.json()) return movie_list
def get(self, movie_name): movie = Movie.get_movie_by_name(movie_name) if movie: mov = movie_schema.dump(movie) return mov, HTTPStatus.OK return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND
def post(self): data = request.get_json() movie_data = {} if Movie.get_movie_by_name(data['name']): return {'message': 'movie already exist'}, HTTPStatus.BAD_REQUEST movie_data = movie_schema.load(data) new_movie = Movie(**movie_data) db.session.add(new_movie) db.session.commit() new_movie = movie_schema.dump(new_movie) return new_movie, HTTPStatus.CREATED
def update_movie(): """ Update movie record by id """ data = get_request_data() if 'id' in data.keys(): try: row_id = int(data['id']) except: err = 'Id must be integer' return make_response(jsonify(error=err), 400) for k in data.keys(): if k not in MOVIE_FIELDS: err = 'more then need' return make_response(jsonify(error=err), 400) try: upd_record = Movie.update(row_id, **data) upd_movie = { k: v for k, v in upd_record.__dict__.items() if k in MOVIE_FIELDS } except: err = 'Wrong input' return make_response(jsonify(error=err), 400) return make_response(jsonify(upd_movie), 200) else: err = 'No id specified' return make_response(jsonify(error=err), 400)
def movie_add_relation(): """ Add actor to movie's cast """ data = get_request_data() if 'id' in data.keys(): try: row_id = int(data['id']) relation_id = int(data['relation_id']) except: err = 'Id must be integer' return make_response(jsonify(error=err), 400) obj = Actor.query.filter_by(id=relation_id).first() try: movie = Movie.add_relation(row_id, obj) rel_movie = { k: v for k, v in movie.__dict__.items() if k in MOVIE_FIELDS } rel_movie['cast'] = str(movie.cast) except: err = 'Record with such id does not exist' return make_response(jsonify(error=err), 400) return make_response(jsonify(rel_movie), 200) else: err = 'No id specified' return make_response(jsonify(error=err), 400)
def setUp(self): # 传入上下文 self.ctx = app.app_context() self.ctx.push() # 手动生成数据库表 schema db.create_all() user = User.new(username='******') user.set_hash_password('test1') admin = User.new(username='******') admin.set_hash_password('admin') Movie.new(title='Test Movie Title', year='2019') self.client = app.test_client() self.runner = app.test_cli_runner()
def put(self, movie_id): json_data = request.get_json() movie = Movie.get_by_id(movie_id=movie_id) if movie is None: return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND identity = get_jwt_identity() current_user = User.get_by_id(identity) if not current_user.is_admin: return {'message': 'Not authorized'}, HTTPStatus.UNAUTHORIZED movie.name = json_data['name'] movie.year = json_data['year'] movie.rating = (json_data['rating']) movie.description = json_data['description'] movie.director = json_data['director'] movie.duration = json_data['duration'] movie.age_rating = json_data['age_rating'] movie.save() return movie.data(), HTTPStatus.OK
async def read_many(limit: int = 50, skip: int = 0): collection = get_collection("movies") rs: List[Movie] = [] cursor = collection.find({}, limit=limit, skip=skip) async for row in cursor: rs.append(Movie(**row)) return rs
def insert_from_args(self, args): movie = Movie(args['release_year'], args['title'], args['wikipedia_link'], args['plot'], args['origin']['id'], args['genre']['id']) movie_id = self.insert(movie) cast_member_service = MovieCastMemberService(self.db) director_service = MovieDirectorService(self.db) if len(args['cast']) > 0: movie_cast_records = [] for cast in args['cast']: movie_cast_records.append(MovieCastMember( movie_id, cast['id'])) cast_member_service.insert_many(movie_cast_records) if len(args['directors']) > 0: movie_director_records = [] for director in args['directors']: movie_director_records.append( MovieDirector(movie_id, director['id'])) director_service.insert_many(movie_director_records) return movie
def movie_clear_relations(): """ Clear all relations by id """ data = get_request_data() if 'id' in data.keys(): try: movie_id = int(data['id']) except: err = 'Id must be integer' return make_response(jsonify(error=err), 400) movie = Movie.clear_relations(movie_id) try: rel_movie = { k: v for k, v in movie.__dict__.items() if k in MOVIE_FIELDS } except: err = 'Record with such id does not exist' return make_response(jsonify(error=err), 400) rel_movie['cast'] = str(movie.cast) return make_response(jsonify(rel_movie), 200) else: err = 'No id specified' return make_response(jsonify(error=err), 400)
def put(self, movie_name): data = request.get_json() mov = Movie.get_movie_by_name(movie_name) if mov: return movie_schema.dump(mov), HTTPStatus.OK return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND
def __parse_and_insert_movie(self, row, director_ids: [], genre_id: uuid, origin_id: uuid, cast_ids: []) -> uuid: movie_title = row['Title'] if len(movie_title) > 120: movie_title = f'{movie_title[:117]}...' release_year = row['Release Year'] wiki_page = row['Wiki Page'] plot = row['Plot'] movie_id = self.movie_service.insert( Movie(release_year, movie_title, wiki_page, plot, origin_id, genre_id)) if len(director_ids) > 0: self.movie_director_service.insert_many( self.movie_director_service.get_list_from_movie_and_director( movie_id, director_ids)) if len(cast_ids) > 0: self.movie_cast_member_service.insert_many( self.movie_cast_member_service.get_list_from_movie_and_cast( movie_id, cast_ids)) return movie_id
def put(self, movie_id): json_data = request.get_json() movie = Movie.get_by_id(movie_id=movie_id) if movie is None: return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND identity = get_jwt_identity() current_user = User.get_by_id(identity) if not current_user.is_admin: return {'message': 'Not authorized'}, HTTPStatus.UNAUTHORIZED print(type(json_data['rating'])) new_rating_list = movie.rating print(new_rating_list) new_rating_list.append(json_data['rating']) print(new_rating_list) movie.name = movie.name movie.year = movie.year movie.rating = new_rating_list movie.description = movie.description movie.director = movie.director movie.duration = movie.duration movie.age_rating = movie.age_rating movie.save() return movie.data(), HTTPStatus.OK
def add_movie(): """ Add new movie """ ### YOUR CODE HERE ### data = get_request_data() if 'name' in data.keys(): if 'year' in data.keys(): if 'genre' in data.keys(): try: if data['year'].isdigit() and len(data['year']) == 4 and data['genre'].isalpha(): new_record = Movie.create(**data) try: new_movie = {k: v for k, v in new_record.__dict__.items() if k in MOVIE_FIELDS} except: err = 'Record with such id does not exist' return make_response(jsonify(error=err), 400) return make_response(jsonify(new_movie), 200) else: return make_response(jsonify(error='ERROR'), 400) except: return make_response(jsonify(error='ERROR'), 400) else: err = 'No genre specified' return make_response(jsonify(error=err), 400) else: err = 'No year specified' return make_response(jsonify(error=err), 400) else: err = 'No name specified' return make_response(jsonify(error=err), 400)
def parse_movie(self, movie): title = self.get_text(movie, ".page-title") genres = self.get_text(movie, "#ctl00_cph_lblGenero").split(", ") languages = self.get_text(movie, "#ctl00_cph_lblIdioma").split(", ") origins = self.get_text(movie, "#ctl00_cph_lblPaisOrigen").split(", ") duration = self.get_duration(movie, "#ctl00_cph_lblDuracion") directors = self.get_directors(movie, "#ctl00_cph_lblDirector") actors = self.get_actors(movie, "#ctl00_cph_lblActores") rated = self.get_text(movie, "#ctl00_cph_lblCalificacion") synopsis = self.get_text(movie, "#ctl00_cph_lblSinopsis") trailer = movie.select_one(".embed-responsive-item").attrs.get("src") shows = list(self.get_shows(movie)) released = len(shows) != 0 movie = Movie(title=title, genres=genres, languages=languages, origins=origins, duration=duration, directors=directors, rated=rated, actors=actors, synopsis=synopsis, trailer=trailer, shows=shows, released=released) self.parsed_movies.append(movie)
def extract(self, imdb_id, soup): url = "http://www.imdb.com/title/" + imdb_id movie = Movie(imdb_id=imdb_id, url=url) overview = soup.find(id="#overview-top") title = overview.find_all("h1", class_="header") infobar = overview.find_all("div", class_="infobar") starbox = overview.find_all("div", class_="star-box") if len(title) > 0: spans = title.find_all("span") if len(spans) == 2: movie.title = spans[0].get_text() if len(infobar) > 0: infobar = infobar[0] metas = infobar.find_all("meta") spans = infobar.find_all("span") times = inforbar.find_all("time") for meta in metas: if meta.has_attr("itemprop"): itemprop = meta.get("itemprop") if itemprop == "contentRating": movie.rating = meta.get("content") elif itemprop == "datePublished": movie.release_date = meta.get("content") for span in spans: if span.has_attr("itemprop"): itemprop = span.get("itemprop") if itemprop == "genre": movie.genres.append(meta.span.get_text()) for time in times: if time.has_attr("itemprop"): itemprop = time.get("itemprop") if itemprop == "duration": movie.duration = time.get("datetime") if len(starbox) > 0: starbox = starbox[0] gigastar = starbox.find_all("div", class_="star-box-giga-star") if len(gigastar) > 0: movie.imdb_rating = gigastar[0].get_text() movie.review_links["imdb"] = url + "/" + "reviews" movie.review_links["external"] = url + "/" + "externalreviews" movie.review_links["critic"] = url + "/criticreviews" return movie