Exemple #1
0
def main():
    ms = spider()
    if len(ms) == 100:
        Movie.save_all(ms)
        print('成功爬取所有页面并存入数据库')
    else:
        print('爬取页面有遗漏')
Exemple #2
0
    def setUp(self):
        self.ctx = app.app_context()
        self.ctx.push()
        db.create_all()

        self.m1 = Movie.new(title='t1', year='2019')
        self.m2 = Movie.new({'title': 't2', 'year': '2019'})
Exemple #3
0
def _resolve_movie(sapo_id, sapo_title, sapo_description):
    """Resolve movie based on id, title and description"""
    id_alias = Movie.from_pymongo(
        ms.get_movie_alias_by_id(sapo_id))  # Movie alias based on id
    if id_alias is None:
        same_titles = Movie.from_pymongo(
            ms.get_movie_in_db_by_title(sapo_title))  # Search by title
        title_aliases = Movie.from_pymongo(
            ms.get_movie_alias_by_title(sapo_title))  # Search by title aliases
        alias_candidates = same_titles + list(
            filter(lambda e: e.sapo_id not in [x.sapo_id for x in same_titles],
                   title_aliases))
        for alias_candidate in alias_candidates:

            if SequenceMatcher(None, alias_candidate.sapo_description,
                               sapo_description).ratio() > 0.5:
                return alias_candidate  # Match found based on title

            for alias_of in ms.get_alias_movie_by_aliasof(
                    alias_candidate.sapo_id):
                if (alias_of['sapo_title'] == sapo_title or alias_of['sapo_title'] in alias_candidate.alias_titles) \
                        and SequenceMatcher(None, alias_of['sapo_description'], sapo_description).ratio() > 0.5:
                    return alias_candidate  # Match found based on alias

    else:
        return id_alias  # Match found based on id

    return None
Exemple #4
0
def test_set_awards_attributes_no_awards_info(movie_list):
    movie = movie_list[0]
    Movie.set_awards_attributes(None, movie)
    assert movie.oscars_won == 0
    assert movie.oscar_nominations == 0
    assert movie.awards_won == 0
    assert movie.award_nominations == 0
Exemple #5
0
    def get_list(self, page=1, category_code="ALL"):
        is_continue = True

        res = rq.get(Naver.MOVIES_URL % (category_code, page))
        soup = BeautifulSoup(res.content, 'lxml')

        items = soup.select('.lst_thum_wrap .lst_thum li a')
        results = []

        Crawler.progress_bar(len(items), 0, 0)

        for idx, item in enumerate(items):
            href, product_no, title, body = self.parse(item)
            movie = Movie(href, product_no, title, body, category_code)
            sleep = 0

            if not movie.is_exist_by_redis():
                movie.save()
                results.append(movie)
                sleep = 1

            Crawler.progress_bar(len(items), idx + 1, sleep)

        if len(items) != Naver.MAX_CNT_OF_PAGE:
            is_continue = False

        return is_continue, results
Exemple #6
0
def recommendation():
    cursor_about = request.json.get("cursorAbout")
    user_matrix = request.json.get("userMatrix", {})
    selection = request.json.get('selection', [])

    clean_selection = [int(uid) for uid in selection]

    results = Movie.related_base(cursor_about, clean_selection)

    scoring = {}
    for index, row in enumerate(results):
        movie = Movie.inflate(row[0])
        bases = [Base.inflate(b).name for b in row[1]]

        content = {
            'title': movie.title,
            'score': 0,
            'relations': [],
            'data': movie.serialize
        }

        for key, value in user_matrix.items():
            if key in bases:
                content['score'] += value
                content['relations'].append(key)

        scoring[index] = content

    return scoring
Exemple #7
0
def add_new_movie(args):
    """Add new movie to the database."""
    title_or_imdb_id = replace_underscores(args.movie_identifier)
    try:
        omdb = OmdbApiResponse(title_or_imdb_id, args.imdb_id)
    except URLError:
        print('Unable to receive data from OMDb API. '
              'Check your internet connection.')
    else:
        if omdb.response:
            cnx = connection(DATABASE)
            c = cnx.cursor()
            check_db = Movie.load_by_imdb_id(c, omdb.movie_data['imdbID'])
            if check_db is None:
                movie = Movie.create_object_from_omdb_data(omdb.movie_data)
                m = movie.save(c)
                if m:
                    print(f'Movie: {movie.title} has been successfully saved '
                          f'to the database')
            else:
                print(f'Movie: {omdb.movie_data["Title"]} already in the '
                      f'database')
            cnx.commit()
            c.close()
            cnx.close()
        else:
            print(f'Movie: {title_or_imdb_id} not found.')
def add_movies():
    session.add_all([
                    Movie(name="The Hunger Games: Catching Fire", rating=7.5),
                    Movie(name="Wreck-It Ralph", rating=7.8),
                    Movie(name="Her", rating=8.3),
                    Movie(name="Avengers: Infinity War", rating=8.8)])
    session.commit()
Exemple #9
0
def _make_movie_object(soup, min_rating=None):
    data = []
    div_movies = soup.findAll("div", {"class": "lister-item mode-advanced"})
    for div_movie in div_movies:
        #'bs4.element.Tag'
        print(type(div_movie))

        name = _get_movie_name(div_movie)
        year = _get_movie_year(div_movie)
        movie_id = _get_movie_id(div_movie)
        movie_runtime = _get_movie_runtime(div_movie)
        rating = _get_movie_rating(div_movie)
        stars = _get_movie_stars(div_movie)
        directors = _get_movie_directors(div_movie)
        summary = _get_movie_summary(div_movie)
        genre = _get_movie_genre(div_movie)

        movie = Movie(id=movie_id,
                      title=name,
                      runtime=movie_runtime,
                      summary=summary,
                      year=year,
                      rating=rating,
                      stars=stars,
                      directors=directors,
                      genre=genre)

        data.append(movie.to_dict())

    return data
Exemple #10
0
def forge():
    """产生 mock 数据"""
    username = '******'
    password = '******'

    movies = [
        {
            'title': 'My Neighbor Totoro',
            'year': '1988'
        },
        {
            'title': 'Dead Poets Society',
            'year': '1989'
        },
        {
            'title': 'A Perfect World',
            'year': '1993'
        },
        {
            'title': 'Leon',
            'year': '1994'
        },
        {
            'title': 'Mahjong',
            'year': '1996'
        },
        {
            'title': 'Swallowtail Butterfly',
            'year': '1996'
        },
        {
            'title': 'King of Comedy',
            'year': '1999'
        },
        {
            'title': 'Devils on the Doorstep',
            'year': '1999'
        },
        {
            'title': 'WALL-E',
            'year': '2008'
        },
        {
            'title': 'The Pork of Music',
            'year': '2012'
        },
    ]

    # 生成一个测试用户
    u = User.new(username=username)
    u.set_hash_password(password)

    for m in movies:
        form = {
            'title': m['title'],
            'year': m['year'],
        }
        Movie.new(form)
    click.echo('Mock data generated.')
Exemple #11
0
 def test_update_by_id(self):
     m1_id = Movie.query.filter_by(title='t1').first().id
     m2_id = Movie.query.filter_by(title='t2').first().id
     Movie.update_by_id(m1_id, year='2018')
     self.assertEqual('2018',
                      Movie.query.filter_by(title='t1').first().year)
     Movie.update_by_id(m2_id, {'title': 'T2'})
     self.assertEqual('T2',
                      Movie.query.filter_by(year='2019').first().title)
Exemple #12
0
def add_movie():
    if request.method == 'GET':
        return render_template('add_movie.html', genres=genres)
    elif request.method == 'POST':
        movie = Movie(None, request.form['name'], request.form['genre'],
                      request.form['release_year'], request.form['duration'],
                      request.form['description'], request.form['rating'],
                      request.form['director_name'])

        movie.create(movie)

        return redirect('/movies')
Exemple #13
0
 def load_movies(self, file_path):
     try:
         with open(file_path, 'r') as csv_file:
             reader = csv.reader(csv_file, delimiter=',')
             # Skip header,
             next(reader, None)
             for row in reader:
                 movie = Movie(movie_id=int(row[0]), title=row[1])
                 self._movies[movie.get_id()] = movie
     except Exception as e:
         self._movies= {}
         print("Could not load movies.", e)
Exemple #14
0
def delete_movie():
    """
    Delete movie by id
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
            Movie.delete(row_id)
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
    msg = 'Record successfully deleted'
    return make_response(jsonify(message=msg), 200)
Exemple #15
0
def get_candidates(movie):
    """Getting movie candidates from Google search"""
    query = movie.sapo_title + ' imdb'
    params = {'key': CONFIG.GOOGLE_KEY, 'cx': CONFIG.GOOGLE_CX, 'q': query}
    url = CONFIG.GOOGLE_ENDPOINT + '?' + urllib.parse.urlencode(params)
    print(url)
    response = json.loads(urllib.request.urlopen(url).read().decode('utf-8'))

    candidates = []

    for item in response['items']:
        if 'pagemap' in item and 'displayLink' in item and 'metatags' in item['pagemap'] and \
                item['displayLink'] == 'www.imdb.com':
            for metatag in item['pagemap']['metatags']:
                if 'og:site_name' in metatag and metatag['og:site_name'] == 'IMDb' and \
                        'og:title' in metatag and \
                        '(TV Series' not in metatag['og:title'] and \
                        '(Video Game' not in metatag['og:title'] and \
                        'Official Trailer' not in metatag['og:title'] and \
                        'pageid' in metatag and not any(c.imdb_id == metatag['pageid'] for c in candidates):

                    candidate = Movie()
                    candidate.sapo_id = movie.sapo_id
                    candidate.sapo_title = movie.sapo_title
                    candidate.sapo_description = movie.sapo_description
                    candidate.imdb_id = metatag['pageid']
                    candidate.imdb_title = metatag['og:title']
                    candidate.imdb_description = metatag['og:description']

                    if complete_movie_with_omdb(
                            candidate
                    ):  # Adding further attributes to the movie object
                        candidates.append(candidate)

    return candidates
def add_movie():
    if "token" not in session:
        return redirect("/login")
    if request.method == "GET":
        return render_template("add_movie.html")
    elif request.method == "POST":
        form = request.form
        title = form["title"]
        image = form["image"]
        year = form["year"]
        username = session["token"]
        user = User.objects(username=username).first()
        new_movie = Movie(title=title, image=image, year=year, user=user)
        new_movie.save()
        return "OKE"
    def all_movies(self):
        sql = "SELECT * FROM movies"
        cursor = connection.cursor()
        cursor.execute(sql)
        records = cursor.fetchall()

        movie_list = []

        for record in records:
            movie = Movie(record[0], record[1], record[2], float(record[3]),
                          record[4], record[5])

            movie_list.append(movie.json())

        return movie_list
Exemple #18
0
    def get(self, movie_name):
        movie = Movie.get_movie_by_name(movie_name)
        if movie:
            mov = movie_schema.dump(movie)
            return mov, HTTPStatus.OK

        return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND
Exemple #19
0
    def post(self):
        data = request.get_json()
        movie_data = {}

        if Movie.get_movie_by_name(data['name']):
            return {'message': 'movie already exist'}, HTTPStatus.BAD_REQUEST

        movie_data = movie_schema.load(data)

        new_movie = Movie(**movie_data)
        db.session.add(new_movie)
        db.session.commit()

        new_movie = movie_schema.dump(new_movie)

        return new_movie, HTTPStatus.CREATED
Exemple #20
0
def update_movie():
    """
    Update movie record by id
    """
    data = get_request_data()

    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)

        for k in data.keys():
            if k not in MOVIE_FIELDS:
                err = 'more then need'
                return make_response(jsonify(error=err), 400)

        try:
            upd_record = Movie.update(row_id, **data)
            upd_movie = {
                k: v
                for k, v in upd_record.__dict__.items() if k in MOVIE_FIELDS
            }
        except:
            err = 'Wrong input'
            return make_response(jsonify(error=err), 400)

        return make_response(jsonify(upd_movie), 200)

    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
Exemple #21
0
def movie_add_relation():
    """
    Add actor to movie's cast
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
            relation_id = int(data['relation_id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)

        obj = Actor.query.filter_by(id=relation_id).first()
        try:
            movie = Movie.add_relation(row_id, obj)
            rel_movie = {
                k: v
                for k, v in movie.__dict__.items() if k in MOVIE_FIELDS
            }
            rel_movie['cast'] = str(movie.cast)
        except:
            err = 'Record with such id does not exist'
            return make_response(jsonify(error=err), 400)

        return make_response(jsonify(rel_movie), 200)

    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
Exemple #22
0
    def setUp(self):
        # 传入上下文
        self.ctx = app.app_context()
        self.ctx.push()
        # 手动生成数据库表 schema
        db.create_all()

        user = User.new(username='******')
        user.set_hash_password('test1')
        admin = User.new(username='******')
        admin.set_hash_password('admin')

        Movie.new(title='Test Movie Title', year='2019')

        self.client = app.test_client()
        self.runner = app.test_cli_runner()
Exemple #23
0
    def put(self, movie_id):

        json_data = request.get_json()

        movie = Movie.get_by_id(movie_id=movie_id)

        if movie is None:
            return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND

        identity = get_jwt_identity()
        current_user = User.get_by_id(identity)

        if not current_user.is_admin:
            return {'message': 'Not authorized'}, HTTPStatus.UNAUTHORIZED

        movie.name = json_data['name']
        movie.year = json_data['year']
        movie.rating = (json_data['rating'])
        movie.description = json_data['description']
        movie.director = json_data['director']
        movie.duration = json_data['duration']
        movie.age_rating = json_data['age_rating']

        movie.save()

        return movie.data(), HTTPStatus.OK
Exemple #24
0
async def read_many(limit: int = 50, skip: int = 0):
    collection = get_collection("movies")
    rs: List[Movie] = []
    cursor = collection.find({}, limit=limit, skip=skip)
    async for row in cursor:
        rs.append(Movie(**row))
    return rs
Exemple #25
0
    def insert_from_args(self, args):
        movie = Movie(args['release_year'], args['title'],
                      args['wikipedia_link'], args['plot'],
                      args['origin']['id'], args['genre']['id'])

        movie_id = self.insert(movie)

        cast_member_service = MovieCastMemberService(self.db)
        director_service = MovieDirectorService(self.db)

        if len(args['cast']) > 0:
            movie_cast_records = []
            for cast in args['cast']:
                movie_cast_records.append(MovieCastMember(
                    movie_id, cast['id']))
            cast_member_service.insert_many(movie_cast_records)

        if len(args['directors']) > 0:
            movie_director_records = []
            for director in args['directors']:
                movie_director_records.append(
                    MovieDirector(movie_id, director['id']))
            director_service.insert_many(movie_director_records)

        return movie
def movie_clear_relations():
    """
    Clear all relations by id
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            movie_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
        movie = Movie.clear_relations(movie_id)
        try:
            rel_movie = {
                k: v
                for k, v in movie.__dict__.items() if k in MOVIE_FIELDS
            }
        except:
            err = 'Record with such id does not exist'
            return make_response(jsonify(error=err), 400)
        rel_movie['cast'] = str(movie.cast)
        return make_response(jsonify(rel_movie), 200)
    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
Exemple #27
0
    def put(self, movie_name):
        data = request.get_json()
        mov = Movie.get_movie_by_name(movie_name)
        if mov:
            return movie_schema.dump(mov), HTTPStatus.OK

        return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND
    def __parse_and_insert_movie(self, row, director_ids: [], genre_id: uuid,
                                 origin_id: uuid, cast_ids: []) -> uuid:
        movie_title = row['Title']

        if len(movie_title) > 120:
            movie_title = f'{movie_title[:117]}...'

        release_year = row['Release Year']
        wiki_page = row['Wiki Page']
        plot = row['Plot']

        movie_id = self.movie_service.insert(
            Movie(release_year, movie_title, wiki_page, plot, origin_id,
                  genre_id))

        if len(director_ids) > 0:
            self.movie_director_service.insert_many(
                self.movie_director_service.get_list_from_movie_and_director(
                    movie_id, director_ids))

        if len(cast_ids) > 0:
            self.movie_cast_member_service.insert_many(
                self.movie_cast_member_service.get_list_from_movie_and_cast(
                    movie_id, cast_ids))

        return movie_id
Exemple #29
0
    def put(self, movie_id):

        json_data = request.get_json()

        movie = Movie.get_by_id(movie_id=movie_id)

        if movie is None:
            return {'message': 'movie not found'}, HTTPStatus.NOT_FOUND

        identity = get_jwt_identity()
        current_user = User.get_by_id(identity)

        if not current_user.is_admin:
            return {'message': 'Not authorized'}, HTTPStatus.UNAUTHORIZED

        print(type(json_data['rating']))
        new_rating_list = movie.rating
        print(new_rating_list)
        new_rating_list.append(json_data['rating'])
        print(new_rating_list)

        movie.name = movie.name
        movie.year = movie.year
        movie.rating = new_rating_list
        movie.description = movie.description
        movie.director = movie.director
        movie.duration = movie.duration
        movie.age_rating = movie.age_rating

        movie.save()

        return movie.data(), HTTPStatus.OK
Exemple #30
0
def add_movie():
    """
    Add new movie
    """
    ### YOUR CODE HERE ###
    data = get_request_data()
    if 'name' in data.keys():
        if 'year' in data.keys():
            if 'genre' in data.keys():
                try:
                    if data['year'].isdigit() and len(data['year']) == 4 and data['genre'].isalpha():
                        new_record = Movie.create(**data)
                        try:
                            new_movie = {k: v for k, v in new_record.__dict__.items() if k in MOVIE_FIELDS}
                        except:
                            err = 'Record with such id does not exist'
                            return make_response(jsonify(error=err), 400)
                        return make_response(jsonify(new_movie), 200)
                    else:
                        return make_response(jsonify(error='ERROR'), 400)
                except:
                    return make_response(jsonify(error='ERROR'), 400)
            else:
                err = 'No genre specified'
                return make_response(jsonify(error=err), 400)
        else:
            err = 'No year specified'
            return make_response(jsonify(error=err), 400)
    else:
        err = 'No name specified'
        return make_response(jsonify(error=err), 400)
    def parse_movie(self, movie):
        title = self.get_text(movie, ".page-title")
        genres = self.get_text(movie, "#ctl00_cph_lblGenero").split(", ")
        languages = self.get_text(movie, "#ctl00_cph_lblIdioma").split(", ")
        origins = self.get_text(movie, "#ctl00_cph_lblPaisOrigen").split(", ")
        duration = self.get_duration(movie, "#ctl00_cph_lblDuracion")
        directors = self.get_directors(movie, "#ctl00_cph_lblDirector")
        actors = self.get_actors(movie, "#ctl00_cph_lblActores")
        rated = self.get_text(movie, "#ctl00_cph_lblCalificacion")
        synopsis = self.get_text(movie, "#ctl00_cph_lblSinopsis")
        trailer = movie.select_one(".embed-responsive-item").attrs.get("src")
        shows = list(self.get_shows(movie))
        released = len(shows) != 0

        movie = Movie(title=title,
                      genres=genres,
                      languages=languages,
                      origins=origins,
                      duration=duration,
                      directors=directors,
                      rated=rated,
                      actors=actors,
                      synopsis=synopsis,
                      trailer=trailer,
                      shows=shows,
                      released=released)

        self.parsed_movies.append(movie)
Exemple #32
0
	def extract(self, imdb_id, soup):
		url = "http://www.imdb.com/title/" + imdb_id

		movie = Movie(imdb_id=imdb_id, url=url)

		overview = soup.find(id="#overview-top")

		title = overview.find_all("h1", class_="header")
		infobar = overview.find_all("div", class_="infobar")
		starbox = overview.find_all("div", class_="star-box")

		if len(title) > 0:
			spans = title.find_all("span")
			if len(spans) == 2:
				movie.title = spans[0].get_text()

		if len(infobar) > 0:
			infobar = infobar[0]
			metas = infobar.find_all("meta")
			spans = infobar.find_all("span")
			times = inforbar.find_all("time")

			for meta in metas:
				if meta.has_attr("itemprop"):
					itemprop = meta.get("itemprop")

					if itemprop == "contentRating":
						movie.rating = meta.get("content")

					elif itemprop == "datePublished":
						movie.release_date = meta.get("content")

			for span in spans:
				if span.has_attr("itemprop"):
					itemprop = span.get("itemprop")

					if itemprop == "genre":
						movie.genres.append(meta.span.get_text())

			for time in times:
				if time.has_attr("itemprop"):
					itemprop = time.get("itemprop")

					if itemprop == "duration":
						movie.duration = time.get("datetime")

		if len(starbox) > 0:
			starbox = starbox[0]

			gigastar = starbox.find_all("div", class_="star-box-giga-star")

			if len(gigastar) > 0:
				movie.imdb_rating = gigastar[0].get_text()

			movie.review_links["imdb"] = url + "/" + "reviews"
			movie.review_links["external"] = url + "/" + "externalreviews"
			movie.review_links["critic"] = url + "/criticreviews"

		

			
		return movie