def imdb_search_by_name(request, format, name): ''' search an actor in imdb by title, and send back a list with names & imdb ids ''' if request.is_ajax(): info = {} info['list'] = [] try: ia = IMDb('httpThin') actors = ia.search_person(name) for actor in actors: id = actor.personID found_name = actor['long imdb canonical name'] info['list'].append({'id':id, 'name':found_name }) except: info['error'] = True if format == 'xml': mimetype = 'application/xml' #TODO xml serialize data = 'Not implemented' if format == 'json': mimetype = 'application/javascript' data = json.dumps(info) return HttpResponse(data ,mimetype) # If you want to prevent non XHR calls else: return HttpResponse(status=400)
def getMissing(idMovieObj): ia = IMDb() path = "./missing.txt" file = open(path, 'a') m = 0 for id, movie in idMovieObj.iteritems(): i = int(id) newFile = "./DB/MovieDB/%s.json" % str(i) if not os.path.exists(newFile): results = ia.search_movie(movie.title.strip()) try: mv = results[0] #First result except IndexError: m += 1 print "Missing %s" % (m) continue URL = ia.get_imdbURL(mv) #URL for first result movie.imdb_url = URL file.write(str(i) + " " + URL + "\n") file.flush() print(URL) #process_URLS(dict) #fileM.write(str(i) + " " + FullDict[i].imdb_url + "\n") file.close() print("Missing Done!")
def get_byURL(url, info=None, args=None, kwds=None): """Return a Movie, Person, Character or Company object for the given URL; info is the info set to retrieve, args and kwds are respectively a list and a dictionary or arguments to initialize the data access system. Returns None if unable to correctly parse the url; can raise exceptions if unable to retrieve the data.""" if args is None: args = [] if kwds is None: kwds = {} ia = IMDb(*args, **kwds) match = _re_imdbIDurl.search(url) if not match: return None imdbtype = match.group(1) imdbID = match.group(2) if imdbtype == 'tt': return ia.get_movie(imdbID, info=info) elif imdbtype == 'nm': return ia.get_person(imdbID, info=info) elif imdbtype == 'ch': return ia.get_character(imdbID, info=info) elif imdbtype == 'co': return ia.get_company(imdbID, info=info) return None
def getTitles(): APP_NAME = 'rankyourfavs' API_KEY = 'smucdewbn2j8rp3rvegmp8y6' API_SECRET = 'DvstJpTa7f' CALLBACK = '' verbose = False netflix = NetflixAPIV2( APP_NAME, API_KEY , API_SECRET) ia = IMDb() movies = Movie.objects.all() count = 0 for movie in movies: print movie.imdb_title if movie.imdb_title == "": imdb_info = ia.get_movie(movie.imdb_id) movie.imdb_title = imdb_info['title'] if movie.netflix_title == "": id = "http://api-public.netflix.com/catalog/titles/movies/" + str(movie.netflix_id) try: netflix_info = netflix.get_title(id) movie.netflix_title = netflix_info['catalog_title']['title']['title_short'] except: time.sleep(10) movie.save() count +=1
def __init__(self, imdbid): print "Fetching info for imdb id " + str(imdbid) for i in range(3): try: imdb = IMDb() self.movie = imdb.get_movie(imdbid) self.title = self.movie['title'] self.HD = None self.genre = self.movie['genre'] self.shortdescription = self.movie['plot outline'] if 'plot' in self.movie.keys(): self.description = self.movie['plot'][0].split('::')[0] else: self.description = self.shortdescription self.date = str(self.movie['year']) # Generate XML tags for Actors/Writers/Directors/Producers self.xml = self.xmlTags() break except: print sys.exc_info()[0] #print "Failed to connect to IMDb, trying again in 20 seconds" time.sleep(20)
def get_rating(self, speech, language, regex): if language == "en-GB" or language == "en-US": MovieTitle = regex.group(regex.lastindex).strip() ia = IMDb() search_result = ia.search_movie(MovieTitle) if not search_result: self.say("Sorry, I could not find any information for " + MovieTitle) self.complete_request() else: movie_info = search_result[0] ia.update(movie_info) try: MovieRating = movie_info['rating'] if (MovieRating < 6): self.say("Rating: " + str(MovieRating) + " out of 10. You probably should not see this movie.") elif (MovieRating < 8): self.say("Rating: " + str(MovieRating) + " out of 10. I recommend you see this movie.") elif (MovieRating >= 8): self.say("Rating: " + str(MovieRating) + " out of 10. This movie is a must-see!") self.complete_request() except: self.say("Sorry. This movie has not yet been rated.") self.complete_request()
def check_film(film, actors_ids, out): im = IMDb() complete_film = im.get_movie(film.getID()) cast_ids = set([person.getID() for person in complete_film['cast']]) are_actors_in = cast_ids.issuperset(actors_ids) film.rate = complete_film.get('rating') out.put((are_actors_in, film))
def handle_url(bot, user, channel, url, msg): """Handle IMDB urls""" if not has_imdb: return m = re.match("http://.*?\.imdb\.com/title/tt([0-9]+)/?", url) if not m: return i = IMDb() movie = i.get_movie(m.group(1)) title = movie['long imdb title'] rating = movie.get('rating', 0.0) votes = movie.get('votes', 'no') toprank = movie.get('top 250 rank') rank = "" if toprank: rank = "Top 250: #%d" % toprank bottomrank = movie.get('bottom 100 rank') if bottomrank: rank = "Bottom 100: #%d" % bottomrank genre = "(" + "/".join(movie.get('genres')) + ")" msg = "[IMDB] %s - Rating: %.1f (%s votes) %s %s" % (title, rating, votes, genre, rank) msg = msg.encode("UTF-8") return bot.say(channel, msg)
def get_director(self, speech, language, regex): MovieTitle = regex.group(regex.lastindex).strip() ia = IMDb() search_result = ia.search_movie(MovieTitle) if not search_result: if language == "zh-CN": self.say(u"抱歉,我没找到“" + str(MovieTitle.encode("utf-8")) + u"”的信息。") else: self.say("Sorry, I could not find any information for " + MovieTitle) self.complete_request() else: movie_info = search_result[0] ia.update(movie_info) MovieRating = movie_info['rating'] if (MovieRating < 6): if language == "zh-CN": self.say(u"评级:" + str(MovieRating) + u"。不值一看。") else: self.say("Rating: " + str(MovieRating) + " out of 10. You probably should not see this movie.") elif (MovieRating < 8): if language == "zh-CN": self.say(u"评级:" + str(MovieRating) + u"。还可以,推荐一看。") else: self.say("Rating: " + str(MovieRating) + " out of 10. I recommend you see this movie.") elif (MovieRating >= 8): if language == "zh-CN": self.say(u"评级:" + str(MovieRating) + u"。必看好片!") else: self.say("Rating: " + str(MovieRating) + " out of 10. This movie is a must-see!") self.complete_request()
def movie_find_via_imdblib(self, stitle, syear): if stitle == u'Spanish Affair 2': pass from imdb import IMDb ia = IMDb() try: movies = ia.search_movie(stitle) except: return for movie in movies: kind = movie['kind'] if not kind == u'movie': pass if kind == u'video game': continue assert kind == u'movie' or kind == u'tv series' or kind == u'episode' or kind == u'tv mini series' title = movie['title'] year = movie['year'] movie_id = movie.movieID pass found_movie = self.ImdbFoundMovie(code=movie_id, title=title, year=year, search_title=stitle, result=self.ImdbFoundMovie.Result.MATCH, search_year=syear) if abs(found_movie.get_year_diff()) <= 1: return found_movie pass if not stitle == u'Spanish Affair 2': pass pass
def search(): query = request.args.get("query") limit = int(request.args.get("limit", 10)) movies = OrderedDict() if query: movies = OrderedDict( (movie.imdb_id, movie) for movie in Series.objects(title__icontains=query)) imdb = IMDb() for movie in imdb.search_movie(query, results=limit * 2): if movie.movieID in movies: continue if movie['kind'] not in [MOVIE, TV_SERIES]: continue cls = Movie if movie['kind'] == MOVIE else Series item = cls(imdb_id=movie.movieID) item.year = int(movie.get('year', 0)) or None item.title = movie['title'] movies[item.imdb_id] = item return jsonify(movies=[movie.json for movie in movies.values()[:limit]])
def watch(): try: when = request.form.get('when', datetime.today().strftime('%d.%m.%Y')) when = datetime.strptime(when, '%d.%m.%Y') where = request.form.get('where') or None language = request.form.get('language') or 'ru' imdb_id = request.form.get('movie') imdb = IMDb() movie = imdb.get_movie(imdb_id) episode = request.form.get('episode') if episode: imdb.update(movie, "episodes") matches = match('^S([0-9]+)E([0-9]+)$', episode) if matches: season = int(matches.group(1).lstrip('0')) episode = int(matches.group(2).lstrip('0')) View.watch_episode(movie, season, episode, when=when, where=where, language=language) else: View.watch(movie, when=when, where=where, language=language) except: pass return redirect(url_for('videos.index'))
def handle(text, mic, profile): mic.say('What movie?') movie_name = mic.activeListen() mic.say('Searching top five results for. %s' %movie_name) ia = IMDb() movie_query = ia.search_movie(movie_name) del movie_query[5:] for movie in movie_query: mic.say('Did you mean %s (%s)?' %(movie.get('title'), movie.get('year'))) response = mic.activeListen() if yes(response): filename = "Movie.CSV" ia.update(movie) movie_info = '%s (%s). ' %(movie.get('title'), movie.get('year')) text = movie_info if movie.get('rating'): movie_info += 'Rating. %s out of 10. ' %movie.get('rating') if movie.get('runtimes'): movie_info += 'Runtime. %s minutes. ' %movie.get('runtimes')[0] if movie.get('genres'): movie_info += 'Genres. %s. ' %'. '.join(movie.get('genres')) if movie.get('plot outline'): movie_info += 'Plot. %s ' %movie.get('plot outline') if movie.get('director'): movie_info += 'Directors. %s. ' %format_names(movie.get('director')) if movie.get('producer'): movie_info += 'Producers. %s. ' %format_names(movie.get('producer')) if movie.get('cast'): movie_info += 'Cast. %s. ' %format_names(movie.get('cast')) mic.say(movie_info) logdata(filename,text) return mic.say('Unable to find information on the requested movie')
def get_list_of_shows(a_directory, a_show_name, a_season_number, a_file_count): # query IMDb for the show db = IMDb() results = db.search_movie(a_show_name) # check to see if a show was matched if 0 == len(results): return ('ERROR', rename_result.SHOW_NOT_FOUND_ERROR, ) # get the first tv show. there could be movies in the list series = [] for item in results: if u'tv series' == item['kind']: series = item break # get the list of episodes db.update(series, 'episodes') # get the number of episodes found from IMDb imdb_file_count = len(series['episodes'][a_season_number]) episode_list = [] for i in range(0, imdb_file_count): episode_list.append(series['episodes'][a_season_number][i+1]['title']) return ('SUCCESS', episode_list)
def handle(self, *args, **options): self.bot = options.get("bot") imdb_am = settings.IMDB_ACCESS_SYSTEM if imdb_am == "http": self.imdb = IMDb() elif imdb_am == "sql": self.imdb = IMDb("sql", settings.IMDB_ACCESS_DB_URI) else: raise CommandError("Incorrect configuration of IMDB_ACCESS_SYSTEM property") for f in args: print "Importing...", f data = json.load(open(f)) for link in data: print link #Data checks if "serie" not in link or link["serie"] in self.not_found: warn("Problems looking up serie %s" % link) elif "temp" not in link or "epi" not in link: warn("Season and episode is not setted in %s" % link) elif "lang" not in link: warn(u"Serie %s with link %s has not lang" % (link["serie"], link["links"])) else: try: #Check if temp and epi are ints link["temp"] = int(link["temp"]) link["epi"] = int(link["epi"]) except: warn("Erroneous temp or epi in %s" % link) else: self.process_link(**link) print "List of series not found:" for serie in self.not_found: print "\t%s" % serie
def stats(self): query_directors = select(['persons.id', func.count('roles.person_id').label('count')], from_obj=['persons', 'roles'], whereclause="roles.person_id = persons.id AND roles.role_type = 'director'", group_by=['persons.id'], order_by='count desc', limit=10) query_actors = select(['persons.id', func.count('roles.person_id').label('count')], from_obj=['persons', 'roles'], whereclause="roles.person_id = persons.id AND roles.role_type = 'cast'", group_by=['persons.id'], order_by='count desc', limit=10) top_directors = DBSession.query(Person, 'count').from_statement(query_directors).all() top_actors = DBSession.query(Person, 'count').from_statement(query_actors).all() ia = IMDb() top250_ids = [x.movieID for x in ia.get_top250_movies()] bottom100_ids = [x.movieID for x in ia.get_bottom100_movies()] top250_count = DBSession.query(Movie).filter(Movie.id.in_(top250_ids)).count() bottom100_count = DBSession.query(Movie).filter(Movie.id.in_(bottom100_ids)).count() total_count = DBSession.query(Movie).count() total_runtime = 1 return {'top250_count': top250_count, 'bottom100_count': bottom100_count, 'total_count': total_count, 'total_runtime' : total_runtime, 'top_directors': top_directors, 'top_actors': top_actors}
def airdates(): if config is None: print 'Config not found!!' return '{}' mmap = MovieMap(config['cache_file']) imdb = IMDb('sql', uri = config['imdb_db_url']) show_list = json.loads(request.get_data()) res = {} default = datetime(2016, 7, 15) try: for id in show_list: res[id] = None rdata = show_list[id] if rdata['imdb_id'] not in mmap.data: movie_id = get_movie_id(imdb, rdata['title'], rdata['imdb_id']) if movie_id: mmap.data[rdata['imdb_id']] = movie_id if rdata['imdb_id'] not in mmap.data: print rdata continue movie = imdb.get_movie(mmap.data[rdata['imdb_id']]) curr_season = rdata['after'][0] curr_episode = rdata['after'][1] next_start_season = curr_season next_start_episode = curr_episode if curr_episode+1 in movie['episodes'][curr_season]: next_start_episode = curr_episode + 1 elif curr_season+1 in movie['episodes']: next_start_season = curr_season + 1 next_start_episode = min(movie['episodes'][next_start_season].keys()) else: continue try: next_episode = imdb.get_movie(movie['episodes'][next_start_season][next_start_episode].movieID) dates = [] for release_date in next_episode['release dates']: dates.append(parser.parse(release_date.split(':')[1], default = default)) res[id] = dict( air_en = min(dates).strftime('%Y-%m-%d'), season = next_start_season, episode = next_start_episode, ) except KeyError as e: logger.exception("Error during movie info fetch") finally: mmap.save() return json.dumps(res) + '\n'
def get_bottom_movies(args): connection = IMDb() items = connection.get_bottom100_movies() if args.first: connection.update(items[0]) print(items[0].summary()) else: list_ranking(items, n=args.n)
def search_do(self, **kw): tmpl_context.search_form = search_movie_form ih = IMDb() results = ih.search_movie(kw['movie_title']) return {'search_term': kw['movie_title'], 'results': results}
def __init__(self, config, http = False): log.info('Using IMDB provider.') self.config = config if not http: self.p = IMDb('mobile') else: self.p = IMDb('http')
class imdbWrapper(movieBase): """Api for theMovieDb""" def __init__(self, config): log.info('Using IMDB provider.') self.config = config self.p = IMDb('mobile') def conf(self, option): return self.config.get('IMDB', option) def find(self, q, limit = 8, alternative = True): ''' Find movie by name ''' log.info('IMDB - Searching for movie: %s' % q) r = self.p.search_movie(q) return self.toResults(r, limit) def toResults(self, r, limit = 8, one = False): results = [] if one: new = self.feedItem() new.imdb = 'tt' + r.movieID new.name = self.toSaveString(r['title']) new.year = r['year'] return new else : nr = 0 for movie in r: results.append(self.toResults(movie, one = True)) nr += 1 if nr == limit: break return results def findById(self, id): ''' Find movie by TheMovieDB ID ''' return [] def findByImdbId(self, id): ''' Find movie by IMDB ID ''' log.info('IMDB - Searching for movie: %s' % str(id)) r = self.p.get_movie(id.replace('tt', '')) return self.toResults(r, one = True) def findReleaseDate(self, movie): pass
def imdb_get_info(request, format, person_id): ''' search a person in imdb by it's id ''' if request.is_ajax(): info = {} try: # set the socket timeout to 1sec import socket socket.setdefaulttimeout(5) # max retrys on this request MAX_RETRY = 3 fetch = True retries = 0 while fetch: if retries >= MAX_RETRY: break try: retries += 1 ia = IMDb('http') person = ia.get_person(person_id) fetch = False except socket.timeout: logger.warn("timeout") except IOError: logger.warn("timeout") except IMDbDataAccessError: logger.warn("imdb error") socket.setdefaulttimeout(None) try: info['name'] = person['name'].replace(' - IMDb', '') except KeyError: pass try: info['bio'] = person['mini biography'] except KeyError: pass try: info['headshot'] = person['headshot'] except KeyError: pass try: info['imdb_url'] = "http://www.imdb.com/name/nm"+person_id+"/" except KeyError: pass try : info['birth_date'] = person.data['birth date'] except KeyError: pass except: info['error'] = True if format == 'xml': mimetype = 'application/xml' #TODO xml serialize data = 'Not implemented' if format == 'json': mimetype = 'application/javascript' data = json.dumps(info) return HttpResponse(data ,mimetype) # If you want to prevent non XHR calls else: return HttpResponse(status=400)
def search_film(self, kw): #make connection ia = IMDb() movie_list = ia.search_movie(kw) movie = movie_list[0] movie_id = ia.get_movie(movie.movieID) plot = movie_id.get("plot",[''])[0] plot = plot.split('::')[0] print plot
def movie_finder(movie): ia = IMDb() movies = ia.search_movie(movie) movie_list = [] for m in movies: print m movie_list.append(m) print movie_list return movie_list
def fill_info(): imdb_conn = IMDb('sql', DB_URI) cust_conn = IMDB() ids = cust_conn.fetch_vec('SELECT imdb_id FROM title WHERE NOT imdb_id IS NULL;') for id in ids: try: imdb_conn.get_movie_business(id) except: print "err %s \n" % id
def search_item(args): connection = IMDb() n = args.n if args.n is not None else DEFAULT_RESULT_SIZE if args.type == 'keyword': items = connection.search_keyword(args.key) if args.first: items = connection.get_keyword(items[0]) list_results(items, type_='movie', n=n) else: print(' # keyword') print('=== =======') for i, keyword in enumerate(items[:n]): print('%(index)3d %(kw)s' % {'index': i + 1, 'kw': keyword}) else: if args.type == 'movie': items = connection.search_movie(args.key) elif args.type == 'person': items = connection.search_person(args.key) elif args.type == 'character': items = connection.search_character(args.key) elif args.type == 'company': items = connection.search_company(args.key) if args.first: connection.update(items[0]) print(items[0].summary()) else: list_results(items, type_=args.type, n=args.n)
def searchPeople(self, name, language): ia = IMDb() search_result = ia.search_person(name.encode("utf-8")) if not search_result: dontfind = self.res["dont_find_people"][language] self.say(dontfind.format(name)) self.complete_request() return None people = search_result[0] ia.update(people) return people
def search_imdb_name(token): ia = IMDb() # s_result = ia.search_movie(token) logging.debug("Querying IMDB for movie name: %s" % token) try: s_result = ia.search_movie(token) logging.debug(s_result) for item in s_result: logging.debug(item["long imdb canonical title"], item.movieID) except IMDbError, err: logging.debug(err)
def searchMovie(self, title, language): ia = IMDb() search_result = ia.search_movie(title.encode("utf-8")) if not search_result: dontfind = self.res["dont_find_movie"][language] self.say(dontfind.format(title)) self.complete_request() return None infos = search_result[0] ia.update(infos) return infos
def print_details(mid): ia = IMDb() movie = ia.get_movie(mid) d = OrderedDict() d['imdb_id'] = movie.getID() d['imdb_url'] = 'http://www.imdb.com/title/tt' + mid + '/' d['title'] = movie['title'] d['year'] = str(movie['year']) d['imdb_rating'] = str(movie['rating']) pprint(d)
movies_ratings_list = [] stop_words_movies = ['2D', '3D', '4DX', 'ATMOS', 'IMAX', 'VP', 'VO', 'VIP'] cartaz_norteshop_page = requests.get( "http://cinemas.nos.pt/cinemas/Pages/norteshopping.aspx") cartaz_norteshop_soup = BeautifulSoup(cartaz_norteshop_page.text, 'html.parser') cartaz_norteshop_a_links = cartaz_norteshop_soup.find_all('a', class_='list-item') for a_link in cartaz_norteshop_a_links: a_href = a_link['href'] if re.match(r'/Filmes/', a_href): movies_list.append(a_link.text) imdb_obj = IMDb() for movie in movies_list: movie_flag = 0 for stop_word in stop_words_movies: if stop_word in movie: movie = movie.replace(stop_word, '').strip( ) # Remove words that might throw off search on IMDB website for movie_rating_tuple in movies_ratings_list: if movie in movie_rating_tuple[0]: movie_flag = 1 if movie_flag == 1: # If the rating of the movie has already been obtained, move on to the next iteration continue movie_search_results = imdb_obj.search_movie(movie) if movie_search_results: movie_obj = movie_search_results[0]
from imdb import IMDb import json ia = IMDb() data = {} seriesIds = [ "0060028", "0092455", "5171438", "0106145", "0112178", "0244365", "0069637" ] count = 0 for sId in seriesIds: m = ia.get_movie(sId) seriesTitle = m["title"] data[seriesTitle] = {} data[seriesTitle]["movieID"] = m.movieID data[seriesTitle]["seasons"] = [] ia.update(m, 'episodes') for season in m["episodes"].values(): data[seriesTitle]["seasons"].append([]) for epNumber, ep in season.items(): epDetails = ia.get_movie(ep.movieID) data[seriesTitle]["seasons"][-1].append({ "title": ep["title"], "episodeID": ep.movieID, "plot": epDetails.get("plot", None), "imdbRating": epDetails.get("rating", None),
class FromIMDB(object): """ This plugin enables generating entries based on an entity, an entity being a person, character or company. It's based on IMDBpy which is required (pip install imdbpy). The basic config required just an IMDB ID of the required entity. For example: from_imdb: ch0001354 Schema description: Other than ID, all other properties are meant to filter the full list that the entity generates. id: string that relates to a supported entity type. For example: 'nm0000375'. Required. job_types: a string or list with job types from job_types. Default is 'actor'. content_types: A string or list with content types from content_types. Default is 'movie'. max_entries: The maximum number of entries that can return. This value's purpose is basically flood protection against unruly configurations that will return too many results. Default is 200. Advanced config example: dynamic_movie_queue: from_imdb: id: co0051941 job_types: - actor - director content_types: tv series accept_all: yes movie_queue: add """ job_types = ['actor', 'actress', 'director', 'producer', 'writer', 'self', 'editor', 'miscellaneous', 'editorial department', 'cinematographer', 'visual effects', 'thanks', 'music department', 'in development', 'archive footage', 'soundtrack'] content_types = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode'] content_type_conversion = { 'movie': 'movie', 'tv series': 'tv', 'tv mini series': 'tv', 'tv movie': 'tv', 'episode': 'tv', 'video movie': 'video', 'video game': 'video game' } character_content_type_conversion = { 'movie': 'feature', 'tv series': 'tv', 'tv mini series': 'tv', 'tv movie': 'tv', 'episode': 'tv', 'video movie': 'video', 'video game': 'video-game', } jobs_without_content_type = ['actor', 'actress', 'self', 'in development', 'archive footage'] imdb_pattern = one_or_more({'type': 'string', 'pattern': r'(nm|co|ch)\d{7}', 'error_pattern': 'Get the id from the url of the person/company you want to use,' ' e.g. http://imdb.com/text/<id here>/blah'}, unique_items=True) schema = { 'oneOf': [ imdb_pattern, {'type': 'object', 'properties': { 'id': imdb_pattern, 'job_types': one_or_more({'type': 'string', 'enum': job_types}, unique_items=True), 'content_types': one_or_more({'type': 'string', 'enum': content_types}, unique_items=True), 'max_entries': {'type': 'integer'}, 'match_type': {'type': 'string', 'enum': ['strict', 'loose']} }, 'required': ['id'], 'additionalProperties': False } ], } def prepare_config(self, config): """ Converts config to dict form and sets defaults if needed """ config = config if isinstance(config, basestring): config = {'id': [config]} elif isinstance(config, list): config = {'id': config} if isinstance(config, dict) and not isinstance(config['id'], list): config['id'] = [config['id']] config.setdefault('content_types', [self.content_types[0]]) config.setdefault('job_types', [self.job_types[0]]) config.setdefault('max_entries', 200) config.setdefault('match_type', 'strict') if isinstance(config.get('content_types'), str_types): log.debug('Converted content type from string to list.') config['content_types'] = [config['content_types']] if isinstance(config['job_types'], str_types): log.debug('Converted job type from string to list.') config['job_types'] = [config['job_types']] # Special case in case user meant to add actress instead of actor (different job types in IMDB) if 'actor' in config['job_types'] and 'actress' not in config['job_types']: config['job_types'].append('actress') return config def get_items(self, config): items = [] for id in config['id']: try: entity_type, entity_object = self.get_entity_type_and_object(id) except Exception as e: log.error( 'Could not resolve entity via ID: {}. ' 'Either error in config or unsupported entity. Error:{}'.format(id, e)) continue items += self.get_items_by_entity(entity_type, entity_object, config.get('content_types'), config.get('job_types'), config.get('match_type')) return set(items) def get_entity_type_and_object(self, imdb_id): """ Return a tuple of entity type and entity object :param imdb_id: string which contains IMDB id :return: entity type, entity object (person, company, etc.) """ if imdb_id.startswith('nm'): person = self.ia.get_person(imdb_id[2:]) log.info('Starting to retrieve items for person: %s' % person) return 'Person', person elif imdb_id.startswith('co'): company = self.ia.get_company(imdb_id[2:]) log.info('Starting to retrieve items for company: %s' % company) return 'Company', company elif imdb_id.startswith('ch'): character = self.ia.get_character(imdb_id[2:]) log.info('Starting to retrieve items for Character: %s' % character) return 'Character', character def get_items_by_entity(self, entity_type, entity_object, content_types, job_types, match_type): """ Gets entity object and return movie list using relevant method """ if entity_type == 'Company': return self.items_by_company(entity_object) if entity_type == 'Character': return self.items_by_character(entity_object, content_types, match_type) elif entity_type == 'Person': return self.items_by_person(entity_object, job_types, content_types, match_type) def flatten_list(self, _list): """ Gets a list of lists and returns a flat list """ for el in _list: if isinstance(el, collections.Iterable) and not isinstance(el, basestring): for sub in self.flatten_list(el): yield sub else: yield el def flat_list(self, non_flat_list, remove_none=False): flat_list = self.flatten_list(non_flat_list) if remove_none: flat_list = [_f for _f in flat_list if _f] return flat_list def filtered_items(self, unfiltered_items, content_types, match_type): items = [] unfiltered_items = set(unfiltered_items) for item in sorted(unfiltered_items): if match_type == 'strict': log.debug('Match type is strict, verifying item type to requested content types') self.ia.update(item) if item['kind'] in content_types: log.verbose('Adding item "{}" to list. Item kind is "{}"'.format(item, item['kind'])) items.append(item) else: log.verbose('Rejecting item "{}". Item kind is "{}'.format(item, item['kind'])) else: log.debug('Match type is loose, all items are being added') items.append(item) return items def items_by_person(self, person, job_types, content_types, match_type): """ Return item list for a person object """ unfiltered_items = self.flat_list( [self.items_by_job_type(person, job_type, content_types) for job_type in job_types], remove_none=True) return self.filtered_items(unfiltered_items, content_types, match_type) def items_by_content_type(self, person, job_type, content_type): return [_f for _f in (person.get(job_type + ' ' + self.content_type_conversion[content_type], [])) if _f] def items_by_job_type(self, person, job_type, content_types): items = person.get(job_type, []) if job_type in self.jobs_without_content_type else [ person.get(job_type + ' ' + 'documentary', []) and person.get(job_type + ' ' + 'short', []) and self.items_by_content_type(person, job_type, content_type) if content_type == 'movie' else self.items_by_content_type(person, job_type, content_type) for content_type in content_types ] return [_f for _f in items if _f] def items_by_character(self, character, content_types, match_type): """ Return items list for a character object :param character: character object :param content_types: content types as defined in config :return: """ unfiltered_items = self.flat_list( [character.get(self.character_content_type_conversion[content_type]) for content_type in content_types], remove_none=True) return self.filtered_items(unfiltered_items, content_types, match_type) def items_by_company(self, company): """ Return items list for a company object :param company: company object :return: company items list """ return company.get('production companies') @cached('from_imdb', persist='2 hours') def on_task_input(self, task, config): try: from imdb import IMDb self.ia = IMDb() except ImportError: log.error('IMDBPY is required for this plugin. Please install using "pip install imdbpy"') return entries = [] config = self.prepare_config(config) items = self.get_items(config) if not items: log.error('Could not get IMDB item list, check your configuration.') return for item in items: entry = Entry(title=item['title'], imdb_id='tt' + self.ia.get_imdbID(item), url='', imdb_url=self.ia.get_imdbURL(item)) if entry.isvalid(): if entry not in entries: entries.append(entry) if entry and task.options.test: log.info("Test mode. Entry includes:") for key, value in list(entry.items()): log.info(' {}: {}'.format(key.capitalize(), value)) else: log.error('Invalid entry created? %s' % entry) if len(entries) <= config.get('max_entries'): return entries else: log.warning( 'Number of entries (%s) exceeds maximum allowed value %s. ' 'Edit your filters or raise the maximum value by entering a higher "max_entries"' % ( len(entries), config.get('max_entries'))) return
def getEps(title): #strip title input by user title = title.strip() # Create IMDb object i = IMDb() #search for title search_results = i.search_movie(title) # Get first search result that is a TV series search_results = filter(lambda s: s['kind'] == 'tv series', search_results) search_results = list(islice(search_results, 0, 1)) #if no result found if len(search_results) == 0: return 'No TV series matches were found for "%s".' % title s = search_results[0] i.update(s, 'episodes') s_title = s['long imdb title'] #if no episode info found if (not s.has_key('episodes')) or len(s['episodes']) == 0: return 'Episode info is unavailable for %s.' % s_title s = sortedEpisodes(s) if len(s) == 0: return 'Episode info is unavailable for %s.' % s_title s.reverse() date_today = date.today() e = [] for ep_ind in range(0, len(s)): if s[ep_ind].has_key('original air date'): if (len(s[ep_ind]['original air date'])) == 4: s[ep_ind]['date'] = strptime(s[ep_ind]['original air date'], '%Y') else: s[ep_ind]['date'] = strptime( s[ep_ind]['original air date'].replace('.', ''), '%d %b %Y') if s[ep_ind].has_key('date'): s[ep_ind]['date'] = date(*s[ep_ind]['date'][0:3]) s[ep_ind]['age'] = (date_today - s[ep_ind]['date']).days if s[ep_ind]['age'] > 0: s[ep_ind]['has aired'] = True else: s[ep_ind]['has aired'] = False e.append(s[ep_ind]) else: s[ep_ind]['has aired'] = False e.append(s[ep_ind]) #function to get season episode format for description def getSE(e): if not isinstance(e['season'], int): return '' Sstr = 'S' + str(e['season']).zfill(2) Estr = 'E' + str(e['episode']).zfill(2) return ' (' + Sstr + Estr + ')' #function to get age of episode (negative if has not aired, positive if has aired) def getAge(e): return locale.format('%i', abs(e['age']), grouping=True) #function to get date of episode def getDate(e): return e['date'].strftime('%a, ') + str( e['date'].day) + e['date'].strftime(' %b %y') titles = [] descriptions = [] dates = [] for i in e: e_out = '' if len(e) > 0: e = i titles.append(s_title) descriptions.append(getSE(e)) e_out = e_out + 'The next upcoming episode ' + 'for ' + s_title + ' ' + 'is "' + e[ 'title'] + '"' + getSE(e) + '.' if e.has_key('age'): e_schedule = 'in %s days' % getAge(e) e_out = e_out + ' It airs ' + e_schedule + ', ' + getDate( e) + '.' dates.append(getDate(e)) else: e_out = e_out + ' Its air date is unavailable.' dates.append('unknown') print(e_out) return titles, descriptions, dates
# Create Series Class for SQL Database Table class Series(Base): __tablename__ = 'series' id = Column(Integer, primary_key=True) tconst = Column(Integer) title = Column(String(255)) num_seasons = Column(Integer) avg_rating = Column(Float) num_votes = Column(Integer) # Create Episode Class for SQL Database Table Base.metadata.create_all(conn) ia = IMDb() # Show Listing rick_n_morty = '2861424' ren_n_stimpy = '0101178' beevis_n_butthead = '0105950' aeon_flux = '0111873' # in case of re search copy Æon Flux celeb_deathmatch = '0208614' daria = '0118298' south_park = '0121955' fam_guy = '0182576' american_dad = '0397306' king_hill = '0118375' space_ghost = '0108937' futurama = '0149460' aqua_thf = '0297494'
import json, urllib.request, sqlite3, time from imdb import IMDb from flask import Flask, flash, redirect, render_template, request, session from flask_session import Session from datetime import datetime from flask_paginate import Pagination, get_page_args app = Flask(__name__) imdb = IMDb() # Ensure templates are auto-reloaded app.config["TEMPLATES_AUTO_RELOAD"] = True # Main function to update and jsonify once a day def main(): update() jsonify() time.sleep(86400) # Parsing function def update(): SQL = sqlite3.connect('movies.db') database = SQL.cursor() print("Starting the list update...\n") page_counter = 1 url = "https://www.rottentomatoes.com/api/private/v2.0/browse?" \ "maxTomato=100&maxPopcorn=100&certified&sortBy=release&type=cf-dvd-streaming-all&page=" movies = [] count = 32
def getEps(title, max_len=990, debug=False): # 1024-32-2 = 990 """Returns a text string containing schedule info for the last aired and the next upcoming episodes for the given TV series title""" # Validate title assert isinstance(title, str), 'A string input was not provided.' # Preprocess title title = title.strip() # Determine if the next upcoming episode's plot should be included if available (Default is True) if title.lower().endswith('/noplot'): title = title[:-len('/noplot')].rstrip() include_plot = False else: include_plot = True try: # Validate title further if len(title) == 0: return 'A title was not provided.' # Create IMDb object i = IMDb() # Get search results max_attempts = 3 # Set to anything greater than 1 for attempt in range(1, max_attempts + 1): try: search_results = i.search_movie(title) break except: if attempt < max_attempts: if debug: print( 'An error occurred while attempting to retrieve search results for "%s". %s attempts were made.' % (title, attempt) + '\n') sleep(attempt * 2) else: return 'An error occurred while attempting to retrieve search results for "%s". %s attempts were made.' % ( title, attempt) del attempt, max_attempts # Get first search result that is a TV series search_results = filter(lambda s: s['kind'] == 'tv series', search_results) search_results = list(islice(search_results, 0, 1)) if len(search_results) == 0: return 'No TV series matches were found for "%s".' % title s = search_results[0] del search_results # Get episodes i.update(s, 'episodes') s_title = s['long imdb title'] if ('episodes' not in s) or len(s['episodes']) == 0: return 'Episode info is unavailable for %s.' % s_title s = sortedEpisodes(s) if len(s) == 0: return 'Episode info is unavailable for %s.' % s_title # Sort episodes in approximately the desired order s.reverse( ) # This results in episodes that are sorted in the desired order. If, however, the episodes are not listed in proper order at the source, such as for "Showtime Championship Boxing" (1987) as of 2/29/08, the error will be copied here. s = list(dropwhile(lambda e: e['season'] == 'unknown', s)) + list( takewhile(lambda e: e['season'] == 'unknown', s) ) # While this may not always produce the most accurate results, it prevents episodes belonging to an unknown season from being thought of as most recent. # Process date related info for episodes date_today = date.today() for ep_ind in range(len(s)): if 'original air date' in s[ep_ind]: try: s[ep_ind]['date'] = strptime( s[ep_ind]['original air date'], '%d %B %Y') except: pass if 'date' in s[ep_ind]: s[ep_ind]['date'] = date(*s[ep_ind]['date'][0:3]) s[ep_ind]['age'] = ( s[ep_ind]['date'] - date_today).days # Age is date delta in days if s[ep_ind]['age'] < 0: s[ep_ind]['has aired'] = True else: s[ep_ind]['has aired'] = False else: s[ep_ind]['has aired'] = False del date_today, ep_ind # Print last 10 listed episodes (if debugging) if debug: print('Last 10 listed episodes:\nS# Epi# Age Episode Title') for e in s[:10]: print( '%s %s %s %s' % (str(e['season']).zfill(2)[:2], str(e['episode']).zfill(4), 'age' in e and str(e['age']).zfill(5) or ' ' * 5, e['title'].encode('latin-1'))) print() # Declare convenient functions for use in generating output string def getSE(e): if not isinstance(e['season'], int): return '' Sstr = 'S' + str(e['season']).zfill(2) Estr = 'E' + str(e['episode']).zfill(2) return ' (' + Sstr + Estr + ')' def getAge(e): return locale.format('%i', abs(e['age']), grouping=True) def getDate(e): return 'i.e. on ' + e['date'].strftime('%a, ') + str( e['date'].day) + e['date'].strftime(' %b %y') # Determine last aired episode # (An episode that airs today is considered to be not yet aired) e = filter(lambda e: e['has aired'], s) e = list(islice(e, 0, 1)) if len(e) > 0: e = e[0] e_schedule = e['age'] != -1 and ('%s days ago' % getAge(e)) or 'yesterday' # Generate output string when last aired episode is available e_out = 'The episode that aired last for ' + s_title + ' is "' + e[ 'title'] + '"' + getSE( e) + '. It aired ' + e_schedule + ', ' + getDate(e) + '. ' del e_schedule else: # Generate output string when last aired episode is unavailable e_out = '' # Determine next upcoming episode # (An episode that airs today is considered to be an upcoming episode) e = list(takewhile(lambda e: e['has aired'] == False, s)) # Memory inefficient if len(e) > 0: e = e[-1] # Generate output string when next upcoming episode is available e_out = e_out + 'The next upcoming episode ' + ( e_out == '' and ('for ' + s_title + ' ') or '') + 'is "' + e['title'] + '"' + getSE(e) + '.' if 'age' in e: e_schedule = e['age'] > 1 and ( 'in %s days' % getAge(e) ) or e['age'] == 1 and 'tomorrow' or e['age'] == 0 and 'today' e_out = e_out + ' It airs ' + e_schedule + ', ' + getDate( e) + '.' del e_schedule else: e_out = e_out + ' Its air date is unavailable.' if include_plot: if 'plot' in e and e['plot'] != 'Related Links': e_out = e_out + ' Its plot is: ' + e['plot'] elif e_out.endswith('Its air date is unavailable.'): e_out = e_out.replace( 'Its air date is unavailable.', 'Its air date and plot are unavailable.') else: e_out = e_out + ' Its plot is unavailable.' else: if e_out != '': # Last: available; Next: unavailable e_out = e_out + 'No upcoming episode is scheduled.' else: # Last: unavailable; Next: unavailable e_out = 'Episode info is unavailable for %s.' % s_title # Conditionally trim output string if (max_len not in [-1, 0, None]) and len(e_out) > max_len - 3: e_out = e_out[:max_len - 3] + '...' # Return output string return e_out except: return 'An error occurred while attempting to retrieve episode info for "%s".' % title
#PLEASE BE AWARE CODE IS A DUMMY AS OF THIS AND CAN ONLY TAKE 4 GENRES AS A USER INPUT WHILE FILTERING from imdb import IMDb import pandas as pd import numpy as np ia = IMDb() top250Movies = ia.get_top250_movies() #get top 20 Movies this way which returns lot of details including genres top250Movies = [ia.get_movie(movie.movieID) for movie in top250Movies[:250]] dataset = [movie['title'] for movie in top250Movies] """ Full list of genre types on IMDB Action Adventure Animation Biography Comedy Crime Drama Family Fantasy Film-Noir History Horror Music Musical Mystery Romance Sci-Fi
#!python2 from imdb import IMDb from Tkinter import * import ttk i = IMDb() # Return a list of credits for an actor def search(actor): name = i.search_person(actor)[0] actor_id = name.personID try: return i.get_person(actor_id)['actor'] except: return i.get_person(actor_id)['actress'] ''' print('\nEnter the names of two actors') print("We'll see what they've been in together") actor_a = raw_input('\nActor 1: ') actor_b = raw_input('\nActor 2: ') ''' def match(*args): disp_match.configure(text='Fetching filmographies')
from imdb import IMDb # create an instance of the IMDb class ia = IMDb() # get a movie and print its director(s) the_matrix = ia.search_movie('Stargate SG1') for movie in the_matrix: print(movie["title"]) print(movie.movieID) mv = ia.get_movie(movie.movieID) print(mv) ia.update(mv, 'episodes') episode = mv['episodes'][4][3] #print(episode) print(episode["plot"]) print(episode["title"]) #print(episode["synopsis"]) for key in mv.current_info: try: print(mv[key]) except Exception as e: print("except" + str(e)) pass print() """ for director in the_matrix['directors']: print(director['name']) # show all information that are currently available for a movie print(sorted(the_matrix.keys()))
animation_movies.append(line[0][2:]) print(i) i = i+1 tsv_file.close() # create the final DB workbook = xlsxwriter.Workbook('USA_DB.xlsx') worksheet = workbook.add_worksheet() worksheet.write('A1', 'Movie_ID') worksheet.write('B1', 'Title') worksheet.write('C1', 'Year') row = 1 # connecting to IMDB ia = IMDb() j = 0 for movie_id in animation_movies: movie = ia.get_movie(movie_id) country = movie.get('country') if country == None: continue if country[0] == "United States": content = [movie_id, movie.get('title'), movie.get('year')] column = 0 for item in content: worksheet.write(row, column, item) column += 1 row += 1 print(j)
pip install imdbpy pip install bs4 pip install pandas pip install requests from imdb import IMDb from bs4 import BeautifulSoup from requests import get import pandas as pd import collections import re ia = IMDb() Movie_Names = [] Movie_actor_1_name = [] Movie_actor_2_name = [] Movie_actor_3_name = [] Movie_director = [] Movie_writter_names = [] Movie_music_names = [] Movie_cinematography_name = [] Movie_costume_name = [] Movie_years = [] Movie_ratings = [] Movie_awords_win = [] Movie_awords_nominations = [] Movie_awords_oscars = []
from imdb import IMDb ia = IMDb() # get a movie and print its director(s) the_matrix = ia.get_movie('0133093') # print(the_matrix['director']) print(the_matrix['runtimes'][0]) print(the_matrix['year']) # show all the information sets avaiable for Movie objects # print(ia.get_movie_infoset())
class PlexHolidays(): def __init__(self): self.plex = Plex() self.imdb = IMDb() keyword = input('Keyword (i.e. Holiday name): ') keyword_matches = [] print('Scanning', self.plex.section.title, '...') for plex_medium in tqdm(self.plex.media): imdb_medium = self.plex2imdb(plex_medium) if not imdb_medium: continue keywords = self.get_keywords(imdb_medium) if keyword.lower() in keywords: keyword_matches.append(plex_medium) if keyword_matches: print('Titles matching\"', keyword, '\" :') for match in keyword_matches: print('\t', match.title) self.plex.create_playlist(input('Playlist name: '), keyword_matches) else: print('No matches found. D:') print('Happy Holidays!') def plex2imdb(self, medium): """ Get the IMDbPy object for a given Plex object. """ # Set appropriate search method and acceptable results based on section type if self.plex.section.type == 'movie': kinds = {'movie', 'short', 'tv movie', 'tv short'} search_function = self.imdb.search_movie else: kinds = {'episode'} search_function = self.imdb.search_episode # Perform IMDb search for the Plex object while True: try: results = [ _ for _ in search_function(medium.title) if _['kind'] in kinds ] break # Time out, try again. except OSError: print('Timed out while downloading', medium.title) continue # No IMDb results whatsoever if not results: return None # Plex has no year listed, return first search result elif not medium.year: return results[0] closest_result, closest_year = None, 9999 for result in results: # This result has no year listed, ignore it. if not result.get('year'): continue # Exact match found if result['year'] == medium.year: return result # Track match with closest year in case exact match is not found elif (medium.year - result['year']) < closest_year: closest_result = result # No exact match found, use result with closest year else: return closest_result def get_keywords(self, imdb_obj): """ Get the plot keywords for a given IMDbPy object. """ if not imdb_obj: return [] data = self.imdb.get_movie_keywords(imdb_obj.movieID)['data'] if not 'keywords' in data: return [] return data['keywords']
from django.forms import model_to_dict from django.template.defaultfilters import urlencode from django.utils.datetime_safe import datetime from django.utils.timezone import make_aware from imdb import IMDb from wikipedia import wikipedia, re from OpenAlumni.Bot import Bot from OpenAlumni.Tools import log, translate, load_page, in_dict, load_json, remove_html, fusion, remove_ponctuation, \ equal_str, now, remove_accents, index_string from OpenAlumni.settings import MOVIE_NATURE from alumni.models import Profil, Work, PieceOfWork, Award, Festival #from scipy import pdist ia = IMDb() def extract_movie_from_cnca(title: str): #title=title.replace(" ","+") obj = {"RechercheOeuvre_1": {"Tbx_Titre": title}} #page=wikipedia.BeautifulSoup(wikipedia.requests.post("http://www.cnc-rca.fr/Pages/Page.aspx?view=RecOeuvre",),headers={'User-Agent': 'Mozilla/5.0'}).text, "html5lib") return title def extract_movie_from_bdfci(pow: PieceOfWork, refresh_delay=31): title = pow.title.replace(" ", "+") page = load_page("https://www.bdfci.info/?q=" + title + "&pa=f&d=f&page=search&src=bdfci&startFrom=1&offset=1", refresh_delay=refresh_delay) articles = page.find_all("article")
from flask import Flask, render_template, request from imdb import IMDb app = Flask(__name__) instance = IMDb() @app.route("/") def home(): return render_template("home.html") @app.route("/movies", methods=["GET", "POST"]) def movies(): if request.method == "POST": search = request.form.get("name") movie = instance.search_movie(str(search)) movie_three = [] for i in range(len(movie)): id_number = movie[i].movieID movie_two = instance.get_movie(id_number) movie_three.append(movie_two) return render_template("movies.html", movie=movie, movie_three=movie_three) else: return render_template("home.html")
def getInfos(self, item, export): infoLabels = self.getAsins(item) infoLabels['DisplayTitle'] = infoLabels['Title'] = self.cleanTitle( item['title']) infoLabels['contentType'] = contentType = item['contentType'].lower() infoLabels['mediatype'] = 'movie' infoLabels['Plot'] = item.get('synopsis') infoLabels['Director'] = item.get('director') infoLabels['Studio'] = item.get('studioOrNetwork') infoLabels['Cast'] = item.get('starringCast', '').split(',') infoLabels['Duration'] = str(item['runtime']['valueMillis'] / 1000) if 'runtime' in item else None infoLabels['TrailerAvailable'] = item.get('trailerAvailable', False) infoLabels['Fanart'] = item.get('heroUrl') infoLabels['isAdult'] = 1 if 'ageVerificationRequired' in str( item.get('restrictions')) else 0 infoLabels['Genre'] = ' / '.join(item.get('genres', '')).replace( '_', ' & ').replace('Musikfilm & Tanz', 'Musikfilm, Tanz') if 'formats' in item and 'images' in item['formats'][0].keys(): try: thumbnailUrl = item['formats'][0]['images'][0]['uri'] thumbnailFilename = thumbnailUrl.split('/')[-1] thumbnailBase = thumbnailUrl.replace(thumbnailFilename, '') infoLabels['Thumb'] = thumbnailBase + thumbnailFilename.split( '.')[0] + '.jpg' except: pass if 'releaseOrFirstAiringDate' in item: infoLabels['Premiered'] = item['releaseOrFirstAiringDate'][ 'valueFormatted'].split('T')[0] infoLabels['Year'] = int(infoLabels['Premiered'].split('-')[0]) if 'regulatoryRating' in item: if item['regulatoryRating'] == 'not_checked' or not item[ 'regulatoryRating']: infoLabels['MPAA'] = getString(30171) else: infoLabels['MPAA'] = AgeRestrictions().GetAgeRating( ) + item['regulatoryRating'] if 'customerReviewCollection' in item: infoLabels['Rating'] = float( item['customerReviewCollection']['customerReviewSummary'] ['averageOverallRating']) * 2 infoLabels['Votes'] = str( item['customerReviewCollection']['customerReviewSummary'] ['totalReviewCount']) elif 'amazonRating' in item: infoLabels['Rating'] = float( item['amazonRating'] ['rating']) * 2 if 'rating' in item['amazonRating'] else None infoLabels['Votes'] = str( item['amazonRating'] ['count']) if 'count' in item['amazonRating'] else None stars = infoLabels['Rating'] ia = IMDb() movs = ia.search_movie(infoLabels['Title']) if len(movs) > 0: ia.update(movs[0]) infoLabels['Rating'] = movs[0].get('rating') else: if stars >= 8: infoLabels['Rating'] = 1.1 else: infoLabels['Rating'] = 1 if contentType == 'series': infoLabels['mediatype'] = 'tvshow' infoLabels['TVShowTitle'] = item['title'] infoLabels['TotalSeasons'] = item['childTitles'][0][ 'size'] if 'childTitles' in item else None elif contentType == 'season': infoLabels['mediatype'] = 'season' infoLabels['Season'] = item['number'] if item['ancestorTitles']: try: infoLabels['TVShowTitle'] = item['ancestorTitles'][0][ 'title'] infoLabels['SeriesAsin'] = item['ancestorTitles'][0][ 'titleId'] except: pass else: infoLabels['SeriesAsin'] = infoLabels['Asins'].split(',')[0] infoLabels['TVShowTitle'] = item['title'] if 'childTitles' in item: infoLabels['TotalSeasons'] = 1 infoLabels['Episode'] = item['childTitles'][0]['size'] elif contentType == 'episode': infoLabels['mediatype'] = 'episode' if item['ancestorTitles']: for content in item['ancestorTitles']: if content['contentType'] == 'SERIES': infoLabels['SeriesAsin'] = content[ 'titleId'] if 'titleId' in content else None infoLabels['TVShowTitle'] = content[ 'title'] if 'title' in content else None elif content['contentType'] == 'SEASON': infoLabels['Season'] = content[ 'number'] if 'number' in content else None infoLabels['SeasonAsin'] = content[ 'titleId'] if 'titleId' in content else None seasontitle = content[ 'title'] if 'title' in content else None if 'SeriesAsin' not in infoLabels.keys( ) and 'SeasonAsin' in infoLabels.keys(): infoLabels['SeriesAsin'] = infoLabels['SeasonAsin'] infoLabels['TVShowTitle'] = seasontitle else: infoLabels['SeriesAsin'] = '' if 'number' in item.keys(): infoLabels['Episode'] = item['number'] if item['number'] > 0: infoLabels['DisplayTitle'] = '%s - %s' % ( item['number'], infoLabels['Title']) else: if ':' in infoLabels['Title']: infoLabels['DisplayTitle'] = infoLabels['Title'].split( ':')[1].strip() if 'TVShowTitle' in infoLabels: infoLabels['TVShowTitle'] = self.cleanTitle( infoLabels['TVShowTitle']) infoLabels = self.getArtWork(infoLabels, contentType) if not export: if not infoLabels['Thumb']: infoLabels['Thumb'] = self._s.DefaultFanart if not infoLabels['Fanart']: infoLabels['Fanart'] = self._s.DefaultFanart if not infoLabels['isPrime'] and not contentType == 'series': infoLabels['DisplayTitle'] = '[COLOR %s]%s[/COLOR]' % ( self._g.PayCol, infoLabels['DisplayTitle']) return contentType, infoLabels
class IMDBAPI: def __init__(self) -> None: self.films = list() self.app = IMDb() # self.save_file() self.load_file() def initialise(self): print("Start fetching") self.films = list() self.load_file() def save_file(self): top250 = self.app.get_top250_movies() for i in range(len(top250)): if i > 100: continue self.films.append(self.app.get_movie(top250[i].movieID)) with open('film.pkl', 'wb') as output: pickle.dump(self.films, output, pickle.HIGHEST_PROTOCOL) def load_file(self): with open('film.pkl', 'rb') as input: self.films = pickle.load(input) def search_for_years(self, start_year, end_year): result = list() for i in self.films: if is_null(i): continue if start_year <= get_year(i) <= end_year: result.append(i) return result @staticmethod def search_for_country(country_category, films): result = list() if country_category == "Other": for i in films: if "Russia" not in get_country(i) and "United States" not in get_country(i): result.append(i) else: for i in films: for j in get_country(i): if j == country_category: result.append(i) return result @staticmethod def search_for_genre(genre, films): result = list() for i in films: for j in get_genres(i): if j == genre: result.append(i) return result """Return a movie or a list of movies as Movie objects""" def present_movie(self, start_year, end_year, country_category, genre): films = self.search_for_years(start_year, end_year) films = self.search_for_country(country_category, films) films = self.search_for_genre(genre, films) return create_film_list(films)
import os import sys from imdb import IMDb import numpy as np import requests import webbrowser from contextlib import closing from bs4 import BeautifulSoup from selenium.webdriver import Firefox # pip install selenium from selenium.webdriver.support.ui import WebDriverWait from selenium import webdriver import numpy as np import urllib.request import cv2 ia = IMDb() def get_image(link): driver = webdriver.PhantomJS() driver.get(link) # load the web page innerHTML = driver.execute_script("return document.body.innerHTML") parsed = BeautifulSoup(innerHTML, "html.parser") driver.close() mediastrip = parsed.find_all('img', class_='pswp__img') imgtags = mediastrip[3] imgurl = imgtags['src'] resp = urllib.request.urlopen(imgurl) #resp = urllib.urlopen(imgurl) image = np.asarray(bytearray(resp.read()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) cv2.imshow("Image", image)
main = Blueprint('main', __name__) # local - use local mysql db local = False # enable_extra - loads poster and plot overview from tmdb for movie info enable_extra = True # to laod posters on profile page posters_on_profile_page = False tmdb_img_url = r'https://image.tmdb.org/t/p/w342' if local: ia = IMDb('s3', 'mysql+mysqldb://may:venom123@localhost/imdb') else: ia = IMDb() def db_fav_exists(tconst, user_id): """ checks if the tconst exists as a favorite for the user of user id `user_id` """ fav_tconst = fav.query.filter_by(user_id=user_id).all() if fav_tconst: # user already has favorites for a in fav_tconst: if a.tconst == int(tconst): # the same tconst already exists return True return False
m = re.search(r'data-tconst="tt(\d+?)">', line) if m: id = m.group(1) result.append(id) top250_url = "https://www.imdb.com/list/ls005197923/?sort=list_order,asc&st_dt=&mode=detail&page=3" r = requests.get(top250_url) html = r.text.split("\n") for line in html: line = line.rstrip("\n") m = re.search(r'data-tconst="tt(\d+?)">', line) if m: id = m.group(1) result.append(id) result = set(result) print(len(result)) ia = IMDb() import inspect #print(inspect.getmembers(ia)) with open('somefile.txt', 'a') as the_file: for v in result: st = [] x = ia.get_movie(v) # print(x) y = x.get('rating') z = x.get('title') ac = x['cast'] print(y) print(z) if len(ac) >= 15: y = str(y) the_file.write(z)
from bs4 import BeautifulSoup import requests import re from imdb import IMDb import json from textblob import TextBlob ia = IMDb() # create an imdb instance url = "https://www.filmsite.org/boxoffice.html" # url from which the names of the top grossing movies are scraped from url_request = requests.get(url) soup = BeautifulSoup(url_request.content, 'html.parser') # gets content scrape = soup.find_all("li") # saving all li tags data = [] for names in scrape: # saving just the text from the tags data.append(names.text) # maybe combine lines 20 till 38 maybe in a function see if you can do a sapply like thing titles = [] for x in data: # from the text, saving only the movie titles if re.match(r">?.*\d{4}", x, re.DOTALL): titles.append(x) clean = [] for title in titles: # cleaning the movie title names title = re.sub(r"Filmsite.org", "", title) title = re.sub(r"\r|\n", "", title)
def __init__(self) -> None: self.films = list() self.app = IMDb() # self.save_file() self.load_file()
import pandas as pd # load ratings df_ratings = pd.read_csv('../data/Personal_Movie_Ratings_Updated.csv') # drop movies missing ratings df_ratings = df_ratings.loc[~df_ratings['My Rating'].isnull(), :].reset_index( drop=True) # load UIDs for movies that do not match search results df_uid = pd.read_csv('../data/Personal_Movies_Missing_UID_Updated.csv', dtype={'UID': str}) df_ratings = df_ratings.merge(df_uid, how='left', validate='1:1') # create an instance of the IMDb class ia = IMDb() # initialize results container results = { 'UID': [], 'My_Rating': [], 'Budget': [], 'Cumulative_Worldwide_Gross': [], 'Production_Company1': [], 'Synopsis': [], 'Top_250_Rank': [], 'Country1': [], 'Director1': [], 'Genre1': [], 'Genre2': [], 'Genre3': [],
terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This script is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this script. If not, see http://www.gnu.org/licenses/. """ from imdb import IMDb import pandas as pd import re imdb_access = IMDb() movie_data = pd.read_csv("movies.csv") # Grab only the movie number out of the IMDB URL movie_data["Movie_Number"] = movie_data["IMDB_URL"].apply( lambda x: re.sub("[^0-9]", "", x)) with open("film-death-counts-Python.csv", "wb") as out_file: out_file.write( "Film,Year,Body_Count,MPAA_Rating,Genre,Director,Actors,Length_Minutes,IMDB_Rating\n" ) for movie_entry in movie_data.iterrows(): # Use a try-catch on the loop to prevent temporary connection-related issues from stopping the scrape try: movie = imdb_access.get_movie(movie_entry[1]["Movie_Number"])
info = soup.find('div', class_='subtext') print('Restrição de idade: ' + info.contents[0].strip()) try: print('Duração: ' + info.time.string.strip()) except AttributeError: pass print('Géneros: ' + ', '.join([g.string for g in info.findAll('a')[:-1]])) print('Data de estreia: ' + info.findAll('a')[-1].string) if '-b' in ops: build() if '-s' in ops: movie = input('Nome do filme: ') ia = IMDb() results = ia.search_movie(movie) mv = results[0] URL = ia.get_imdbURL(mv) info_movie(URL) if '-a' in ops: movies_db = build_movies_db() choosen_movie = get_movie_url(movies_db) if choosen_movie: FULL_SCRIPT = scrap_full_script(choosen_movie) FULL_SCRIPT_CLEAN = cleaning_data(FULL_SCRIPT) along_script_sent(FULL_SCRIPT_CLEAN) else: print('Não foram encontrados resultados.')
from imdb import IMDb # create an instance of the IMDb class ia = IMDb() # get a movie and print its director(s) the_matrix = ia.get_movie('0133093') for director in the_matrix['directors']: print(director['name']) # show all information that are currently available for a movie print(sorted(the_matrix.keys())) # show all information sets that can be fetched for a movie print(ia.get_movie_infoset()) # update a Movie object with more information ia.update(the_matrix, ['technical']) # show which keys were added by the information set print(the_matrix.infoset2keys['technical']) # print one of the new keys print(the_matrix.get('tech'))
from imdb import IMDb import sys import json # create an instance of the IMDb class ia = IMDb() #get the imdbid from the command imdbId = str(sys.argv[1]) # get movie keywords movie = ia.get_movie(imdbId, info=['keywords']) # for keyword in movie['keywords']: # print(keyword) # convert the keywords to json print json.dumps(movie['keywords'])
import urllib from imdb import IMDb oa=IMDb() thematrix=oa.get_movie('0133093') print(thematrix['director'])
__author__ = 'abhishekchoudhary' from imdb import IMDb ia = IMDb() the_matrix = ia.get_movie('0133093') print the_matrix['director'] for person in ia.search_person('Salman Khan'): print person.personID, person['name']