def test_get_listens_df(self):
     metadata = {}
     mapped_listens = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_MAPPED_LISTENS)
     listens_df = create_dataframes.get_listens_df(mapped_listens, metadata)
     self.assertEqual(listens_df.count(), 24)
     self.assertCountEqual(['recording_mbid', 'user_id'], listens_df.columns)
     self.assertEqual(metadata['listens_count'], 24)
 def test_get_listens_df(self):
     metadata = {}
     mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path)
     listens_df = create_dataframes.get_listens_df(mapped_listens, metadata)
     self.assertEqual(listens_df.count(), 8)
     self.assertListEqual(['mb_recording_mbid', 'user_name'], listens_df.columns)
     self.assertEqual(metadata['listens_count'], 8)
    def test_save_playcounts_df(self):
        metadata = {}
        mapped_listens = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_MAPPED_LISTENS)
        users_df = create_dataframes.get_users_dataframe(mapped_listens, {}, RECOMMENDATION_RECORDING_USERS_DATAFRAME)
        recordings_df = create_dataframes.get_recordings_df(mapped_listens, {}, RECOMMENDATION_RECORDINGS_DATAFRAME)
        listens_df = create_dataframes.get_listens_df(mapped_listens, {})

        create_dataframes.save_playcounts_df(listens_df, recordings_df, users_df, metadata, RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME)
        playcounts_df = utils.read_files_from_HDFS(RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME)
        self.assertEqual(playcounts_df.count(), 20)

        self.assertListEqual(['spark_user_id', 'recording_id', 'count'], playcounts_df.columns)
        self.assertEqual(metadata['playcounts_count'], 20)
    def test_save_playcounts_df(self):
        metadata = {}
        mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path)
        users_df = create_dataframes.get_users_dataframe(mapped_listens, {})
        recordings_df = create_dataframes.get_recordings_df(mapped_listens, {})
        listens_df = create_dataframes.get_listens_df(mapped_listens, {})

        create_dataframes.save_playcounts_df(listens_df, recordings_df, users_df, metadata)
        playcounts_df = utils.read_files_from_HDFS(path.PLAYCOUNTS_DATAFRAME_PATH)
        self.assertEqual(playcounts_df.count(), 5)

        self.assertListEqual(['user_id', 'recording_id', 'count'], playcounts_df.columns)
        self.assertEqual(metadata['playcounts_count'], playcounts_df.count())
Esempio n. 5
0
    def test_save_playcounts_df(self):
        metadata = {}
        mapped_listens = utils.read_files_from_HDFS(self.mapped_listens_path)
        users_df = create_dataframes.get_users_dataframe(
            mapped_listens, {}, self.users_path)
        recordings_df = create_dataframes.get_recordings_df(
            mapped_listens, {}, self.recordings_path)
        listens_df = create_dataframes.get_listens_df(mapped_listens, {})

        threshold = 0
        create_dataframes.save_playcounts_df(listens_df, recordings_df,
                                             users_df, threshold, metadata,
                                             self.playcounts_path)
        playcounts_df = utils.read_files_from_HDFS(
            path.RECOMMENDATION_RECORDING_PLAYCOUNTS_DATAFRAME)
        self.assertEqual(playcounts_df.count(), 5)

        self.assertListEqual(['user_id', 'recording_id', 'count'],
                             playcounts_df.columns)
        self.assertEqual(metadata['playcounts_count'], playcounts_df.count())