def main(): paths = "/home/nikoscf/PycharmProjects/BookRecommendation/configurations/paths.yml" load_begin = DataLoader() load_begin.read_paths(paths) # Uncomment to Execute this one time to get the zip if is .zip, unzip it in absolute dir you set in paths.yaml # Then it checks for .csv and remove the redundant zip folder # load_begin.check_zip_and_csv() books = load_begin.read_data("BX-Books.csv") users = load_begin.read_data("BX-Users.csv") ratings = load_begin.read_data("BX-Book-Ratings.csv") to_drop_columns = ['Image-URL-S', 'Image-URL-M', 'Image-URL-L'] numeric_col_to_nan = ["Year-Of-Publication"] data_books = DataClean(books) clean_books = data_books.execute_pipeline_cleaning(to_drop_columns, numeric_col_to_nan) to_drop_columns = [] numeric_col_to_nan = ["User-ID", "Age"] data_users = DataClean(users) clean_users = data_users.execute_pipeline_cleaning(to_drop_columns, numeric_col_to_nan) to_drop_columns = [] numeric_col_to_nan = ["User-ID", "ISBN", "Book-Rating"] data_ratings = DataClean(ratings) clean_ratings = data_ratings.execute_pipeline_cleaning( to_drop_columns, numeric_col_to_nan) data_analysis = DataAnalysis() ratings_pivoted = data_analysis.execute_pipeline_data_analysis( clean_ratings, clean_users, clean_books) return ratings_pivoted