Ejemplo n.º 1
0
def with_pre_load_data_set():
    logger = logging.getLogger(__name__)
    song_set_df = get_song_set_df()
    preference_statistic = PreferenceAnalytics(
        users_preferences_df=get_users_preference_df()
    )
    preference_statistic.run()
    for metadata, pt_graph_name in zip(METADATA_TO_PROCESS_LIST, METADATA_TO_PROCESS_LIST_PT):
        metadata_to_process_list = ['id', metadata]
        logger.info("*" * 60)
        logger.info(
            "*\tProcessando o metadado - "
            + str(metadata)
        )
        logger.info("*" * 60)
        one_metadata_process(song_set_df=song_set_df.filter(metadata_to_process_list, axis=1),
                             users_preferences_df=preference_statistic.get_users_relevance_preferences_df(
                                 user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic,
                             label=pt_graph_name)
    one_metadata_process(song_set_df=song_set_df.filter(['id', 'album', 'title'], axis=1),
                         users_preferences_df=preference_statistic.get_users_relevance_preferences_df(
                             user_top_n_relevance=USER_SIZE), preference_statistic=preference_statistic,
                         label='|AL|+|TL|')
    one_metadata_process(
        song_set_df=concat_metadata_preserve_id(df_list=song_set_df, metadata_to_process_list=['album', 'title'],
                                                new_column='AL+TL'),
        users_preferences_df=preference_statistic.get_users_relevance_preferences_df(
            user_top_n_relevance=USER_SIZE),
        preference_statistic=preference_statistic,
        label='AL+AR')
    preference_statistic.print_song_statistical()
    preference_statistic.print_user_statistical()
    preference_statistic.make_graphics()
    make_evaluate_graphics()
def data_analysis():
    song_set_df = get_song_set_df()
    preference_statistic = PreferenceAnalytics(
        users_preferences_df=get_users_preference_df(song_set_df),
        song_df=song_set_df)
    preference_statistic.run()
    preference_statistic.print_song_statistical()
    preference_statistic.print_user_statistical()
    preference_statistic.make_graphics()
def pre_load_data_set_and_song_variation_all_combination():
    logger = logging.getLogger(__name__)
    song_set_df = get_song_set_df()
    preference_statistic = PreferenceAnalytics(
        users_preferences_df=get_users_preference_df(song_set_df),
        song_df=song_set_df)
    preference_statistic.run()
    for song_set_size in SONG_SET_SIZE_LIST:
        song_set_with_size_df = song_select(
            song_set_df, song_set_size,
            preference_statistic.get_song_relevance_df())
        preference_statistic_with_size = PreferenceAnalytics(
            users_preferences_df=get_users_preference_df(
                song_set_with_size_df),
            song_df=song_set_with_size_df)
        preference_statistic_with_size.run()
        for metadata, pt_graph_name in zip(METADATA_TO_PROCESS_LIST,
                                           METADATA_TO_PROCESS_LIST):
            gc.collect()
            metadata_to_process_list = ['id', metadata]
            logger.info("*" * 60)
            logger.info("*\tEXPERIMENTO 1 - " + str(metadata))
            logger.info("*" * 60)
            one_metadata_process(
                song_set_df=song_set_with_size_df.filter(
                    metadata_to_process_list, axis=1),
                users_preferences_df=preference_statistic_with_size.
                get_users_relevance_preferences_df(
                    user_top_n_relevance=USER_SIZE),
                preference_statistic=preference_statistic_with_size,
                label=pt_graph_name)
        gc.collect()
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 2 - " + "album and title - |AL|+|TL|")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=song_set_with_size_df.filter(['id', 'album', 'title'],
                                                     axis=1),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='|AL|+|TL|')
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 2 - " + "album and artist - |AL|+|AR|")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=song_set_with_size_df.filter(['id', 'album', 'artist'],
                                                     axis=1),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='|AL|+|AR|')
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 2 - " + "title and artist - |TL|+|AR|")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=song_set_with_size_df.filter(['id', 'title', 'artist'],
                                                     axis=1),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='|TL|+|AR|')
        gc.collect()
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 3 - " + "title and album - AL+TL")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=concat_metadata_preserve_id(
                df_list=song_set_with_size_df,
                metadata_to_process_list=['album', 'title'],
                new_column='AL+TL'),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='AL+TL')
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 3 - " + "artist and album - AL+AR")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=concat_metadata_preserve_id(
                df_list=song_set_with_size_df,
                metadata_to_process_list=['album', 'artist'],
                new_column='AL+AR'),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='AL+AR')
        logger.info("*" * 60)
        logger.info("*\tEXPERIMENTO 3 - " + "title and artist - TL+AR")
        logger.info("*" * 60)
        one_metadata_process(
            song_set_df=concat_metadata_preserve_id(
                df_list=song_set_with_size_df,
                metadata_to_process_list=['title', 'artist'],
                new_column='TL+AR'),
            users_preferences_df=preference_statistic_with_size.
            get_users_relevance_preferences_df(user_top_n_relevance=USER_SIZE),
            preference_statistic=preference_statistic_with_size,
            label='TL+AR')
        # logger.info("*" * 60)
        # logger.info(
        #     "*\tEXPERIMENTO 4 - "
        #     + "title, artist and album - TL+AR+AL"
        # )
        # logger.info("*" * 60)
        # one_metadata_process(
        #     song_set_df=concat_metadata_preserve_id(df_list=song_set_with_size_df,
        #                                             metadata_to_process_list=['title', 'artist', 'album'],
        #                                             new_column='TL+AR+AL'),
        #     users_preferences_df=preference_statistic_with_size.get_users_relevance_preferences_df(
        #         user_top_n_relevance=USER_SIZE),
        #     preference_statistic=preference_statistic_with_size,
        #     label='TL+AR+AL'
        # )
        # gc.collect()
        # logger.info("*" * 60)
        # logger.info(
        #     "*\tEXPERIMENTO 4 - "
        #     + "title, artist and album- |TL|+|AR|+|AL|"
        # )
        # logger.info("*" * 60)
        # one_metadata_process(
        #     song_set_df=song_set_with_size_df.filter(['id', 'title', 'artist', 'album'], axis=1),
        #     users_preferences_df=preference_statistic_with_size.get_users_relevance_preferences_df(
        #         user_top_n_relevance=USER_SIZE),
        #     preference_statistic=preference_statistic_with_size,
        #     label='|TL|+|AR|+|AL|'
        # )
        preference_statistic_with_size.print_song_statistical()
        preference_statistic_with_size.print_user_statistical()
        preference_statistic_with_size.make_graphics()
    gc.collect()
    make_evaluate_graphics()