def process_imdb(): """ Processing IMDb dataset: download, merge and clean data, store final dataframe and unique genres. """ # Download the data and store the cleaned and merged datasets. df_imdb_titles = hf.download_gz_file(IMDB_TITLES_URL) df_imdb_ratings = hf.download_gz_file(IMDB_RATINGS_URL) df_imdb = hf.clean_imdb_data(df_imdb_titles, df_imdb_ratings) hf.save_file(df_imdb, PROCESSED_DIR, IMDB_FILE_NAME, CSV_EXT) # Get unique genres and storing them in data folder. genres = hf.get_unique_genres(df_imdb) hf.save_file(genres, PROCESSED_DIR, GENRES_FILE_NAME, PKL_EXT)
def process_netflix(): """ Processing Netflix dataset: download and parse data, create final files and storing them """ # Download the files, unzip them and get the data in a dataframe. hf.download_netflix_data(NF_KAGGLE_USER, NF_DIRECTORY) list_nf_data = [] for file in LIST_NF_FILES: list_nf_data += hf.parse_data(os.path.join(NF_DIRECTORY, file)) df_netflix = pd.DataFrame(list_nf_data, columns=DF_NF_COLS) # Get the movie recommendation dictionary and store in data folder. dict_recommendations = hf.get_recommended_movies(df_netflix) hf.save_file(dict_recommendations, PROCESSED_DIR, DICT_NAME, PKL_EXT) # Cleaning the movie_titles file df_titles = hf.format_movie_titles(TITLES_PATH) hf.save_file(df_titles, PROCESSED_DIR, TITLE_FILE_NAME, CSV_EXT) #Deleting original Netflix dataset directory shutil.rmtree(NF_DIRECTORY)
acquisition_type = acquisition_data.neurodata_type series_name = stim_names[acq_i] stimulus_data = input_file.stimulus.get(series_name) acq_copy_func = acquisition_copy_functions[acquisition_type] stim_copy_func = stimulus_copy_functions[acquisition_type] output_acq = acq_copy_func(nwbfile, acquisition_data, series_name, electrode) output_stim = stim_copy_func(nwbfile, acquisition_data, stimulus_data, series_name, electrode) nwbfile.add_acquisition(output_acq) nwbfile.add_stimulus(output_stim) stim_ind += 1 save_file(base_dir + output_filename, nwbfile) i_io.close() ## Print data of the new file for testing purposes for acq_name, stim_name in zip(nwbfile.acquisition.keys(), nwbfile.stimulus.keys()): print(f'{acq_name} {stim_name}') acq_desc = json.loads(nwbfile.acquisition[acq_name].description) stim_desc = json.loads(nwbfile.stimulus[stim_name].description) for key in acq_desc.keys(): print(f'{acq_desc[key]} {stim_desc[key]}')