def test_get_listens_df(self): metadata = {} mapped_listens = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_MAPPED_LISTENS) listens_df = create_dataframes.get_listens_df(mapped_listens, metadata) self.assertEqual(listens_df.count(), 24) self.assertCountEqual(['recording_mbid', 'user_id'], listens_df.columns) self.assertEqual(metadata['listens_count'], 24)
def test_get_listens_df(self): metadata = {} mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path) listens_df = create_dataframes.get_listens_df(mapped_listens, metadata) self.assertEqual(listens_df.count(), 8) self.assertListEqual(['mb_recording_mbid', 'user_name'], listens_df.columns) self.assertEqual(metadata['listens_count'], 8)
def test_save_playcounts_df(self): metadata = {} mapped_listens = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_MAPPED_LISTENS) users_df = create_dataframes.get_users_dataframe(mapped_listens, {}, RECOMMENDATION_RECORDING_USERS_DATAFRAME) recordings_df = create_dataframes.get_recordings_df(mapped_listens, {}, RECOMMENDATION_RECORDINGS_DATAFRAME) listens_df = create_dataframes.get_listens_df(mapped_listens, {}) create_dataframes.save_playcounts_df(listens_df, recordings_df, users_df, metadata, RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME) playcounts_df = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME) self.assertEqual(playcounts_df.count(), 20) self.assertListEqual(['spark_user_id', 'recording_id', 'count'], playcounts_df.columns) self.assertEqual(metadata['playcounts_count'], 20)
def test_save_playcounts_df(self): metadata = {} mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path) users_df = create_dataframes.get_users_dataframe(mapped_listens, {}) recordings_df = create_dataframes.get_recordings_df(mapped_listens, {}) listens_df = create_dataframes.get_listens_df(mapped_listens, {}) create_dataframes.save_playcounts_df(listens_df, recordings_df, users_df, metadata) playcounts_df = utils.read_files_from_HDFS(path.PLAYCOUNTS_DATAFRAME_PATH) self.assertEqual(playcounts_df.count(), 5) self.assertListEqual(['user_id', 'recording_id', 'count'], playcounts_df.columns) self.assertEqual(metadata['playcounts_count'], playcounts_df.count())
def test_save_playcounts_df(self): metadata = {} mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path) users_df = create_dataframes.get_users_dataframe( mapped_listens, {}, self.users_path) recordings_df = create_dataframes.get_recordings_df( mapped_listens, {}, self.recordings_path) listens_df = create_dataframes.get_listens_df(mapped_listens, {}) threshold = 0 create_dataframes.save_playcounts_df(listens_df, recordings_df, users_df, threshold, metadata, self.playcounts_path) playcounts_df = utils.read_files_from_HDFS( path.RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME) self.assertEqual(playcounts_df.count(), 5) self.assertListEqual(['user_id', 'recording_id', 'count'], playcounts_df.columns) self.assertEqual(metadata['playcounts_count'], playcounts_df.count())