def empty_column(self, column): """ Checks if a given column is empty. Returns ``True`` if empty and ``False`` if not empty. `Args:` column: str The column name `Returns:` bool """ if petl.nrows(petl.selectnotnone(self.table, column)) == 0: return True else: return False
# print(directorsAndGenresAndMovies) # print(actorsAndRoles) # print(moviesAndGenresAndDirectorsAndRoles) # Delete unnecessary columns from all tables ranks = etl.cut(moviesAndDirectorsAndRoles, 'movie_id', 'rank', 'director_id', 'actor_id') movies = etl.cut(moviesAndDirectorsAndRoles, 'movie_id', 'name') directors = etl.cut(directorsAndMovies, 'id', 'full_name') actors = etl.cut(actors, 'id', 'full_name') # Rename id to include table name directors = etl.rename(directors, 'id', 'director_id') actors = etl.rename(actors, 'id', 'actor_id') # Remove rows with NULL ranks ranks = etl.distinct(ranks) ranks = etl.selectnotnone(ranks, 'rank') # Remove duplicates after cutting columns movies = etl.distinct(movies) directors = etl.distinct(directors) actors = etl.distinct(actors) # Insert final tables into data warehouse etl.todb(ranks, imdbWarehouse, 'ranks') etl.todb(movies, imdbWarehouse, 'movies') etl.todb(actors, imdbWarehouse, 'actors') etl.todb(directors, imdbWarehouse, 'directors')