def extract_acts_by_person(person, db_uri=None): """Extracts movies or tv shows the passed *person* played in.""" person_source = process_url(u'http://www.imdb.com/name/' + person.id) for title_id in parser.roles(person_source): title = get_cached_model(title_id) if title is None: # We don't have the movie or tv show cached; index it first. source = process_url( urlparse.urljoin(constants.url_base, 'title/' + title_id)) new_title = factory.model_builder(source) if new_title: if store_model(new_title): title = get_cached_model(title_id) if title is not None: store_cast_member(title, person)
def models_from_source(source, db_uri=None): for model_id in parser.model_ids(source): # Search the database for a cached model matching the current model id. cached_model = get_cached_model(model_id, db_uri) if cached_model: yield cached_model continue # When we reach this point in the loop the model is not cached in our database. # Build the model, cache and yield it. _model_url = model_url(model_id) if _model_url is None: continue model_source = process_url(_model_url) model = factory.model_builder(model_source, db_uri) if model: if store_model(model, db_uri): yield get_cached_model(model_id, db_uri)
def models_from_json(json_data, db_uri=None): """Parses the passed search result *json_data* and extracts the models contained in it.""" if not json_data or not json_data.get('d'): return for result in json_data.get('d'): cached_model = get_cached_model(result['id'], db_uri) if cached_model: store_search_result(json_data.get('q'), cached_model, db_uri) yield cached_model continue _model_url = model_url(result['id']) if _model_url is None: continue model_source = process_url(_model_url) model = factory.model_builder(model_source, db_uri) if model: store_search_result(json_data.get('q'), model, db_uri) if store_model(model, db_uri): yield get_cached_model(result['id'], db_uri)
def test_person_builder(self): with open(os.path.join(self.here, 'files', 'morgan_freeman.html')) as f: person_source = f.read() person = factory.model_builder(person_source) assert isinstance(person, models.Person) == True
def test_tvshow_builder(self): with open(os.path.join(self.here, 'files', 'black_mirror.html')) as f: tvshow_source = f.read() tvshow = factory.model_builder(tvshow_source, self.db_uri) assert isinstance(tvshow, models.TVShow) == True
def test_movie_builder(self): with open(os.path.join(self.here, 'files', 'the_matrix.html')) as f: movie_source = f.read() movie = factory.model_builder(movie_source, self.db_uri) assert isinstance(movie, models.Movie) == True