Ejemplo n.º 1
0
    def empty_column(self, column):
        """
        Checks if a given column is empty. Returns ``True`` if empty and ``False``
        if not empty.

        `Args:`
            column: str
                The column name
        `Returns:`
            bool
        """

        if petl.nrows(petl.selectnotnone(self.table, column)) == 0:
            return True
        else:
            return False
Ejemplo n.º 2
0
# print(directorsAndGenresAndMovies)
# print(actorsAndRoles)
# print(moviesAndGenresAndDirectorsAndRoles)

# Delete unnecessary columns from all tables
ranks = etl.cut(moviesAndDirectorsAndRoles, 'movie_id', 'rank', 'director_id',
                'actor_id')
movies = etl.cut(moviesAndDirectorsAndRoles, 'movie_id', 'name')
directors = etl.cut(directorsAndMovies, 'id', 'full_name')
actors = etl.cut(actors, 'id', 'full_name')

# Rename id to include table name
directors = etl.rename(directors, 'id', 'director_id')
actors = etl.rename(actors, 'id', 'actor_id')

# Remove rows with NULL ranks
ranks = etl.distinct(ranks)
ranks = etl.selectnotnone(ranks, 'rank')

# Remove duplicates after cutting columns
movies = etl.distinct(movies)
directors = etl.distinct(directors)
actors = etl.distinct(actors)

# Insert final tables into data warehouse
etl.todb(ranks, imdbWarehouse, 'ranks')
etl.todb(movies, imdbWarehouse, 'movies')
etl.todb(actors, imdbWarehouse, 'actors')
etl.todb(directors, imdbWarehouse, 'directors')