Exemple #1
0
    def get_feedback_top_user(self, criteria):
        """ Get the feedback of top user
        """

        user_feedback = UserFeedback(**self.config)
        ### Read admissions groupby date and
        df_userfb = user_feedback.get_feedback_by_topuser(criteria)

        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # SELECT ONLY 2 COLUMNS
            app_yu_g = app_yu_g[[
                self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'],
                self.config['COL']['YU_VP_720']
            ]]

            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            #app_youtube_general = app_youtube_general.drop(app_youtube_general[self.config['COL']['YU_VP_720']]==0, inplace=True)
            #app_yu_g = app_yu_g.head(5)
            # y_labels = (self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720'])
            y_labels = ('YU_VDO_PLAYB_P720', self.config['COL']['FB_VAL'])
            #Helper.plot_heatmap(app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]])
            Helper.plot_heatmap(app_yu_g[self.config['COL']['YU_VP_720']].head(100), \
                app_yu_g[self.config['COL']['FB_VAL']].head(100), \
                y_labels)
            # User Feedback file name
            # filename = self.config['OUT_DIR'] + self.config['OUT_FNAME']['FEEDBACK_TOP_USER.csv']
            # filename = self.config['OUT_DIR'] + 'FEEDBACK_TOP_USER.csv'
            #filename = self.config['OUT_DIR'] + 'FEEDBACK_TOP_USER_YU_G_2COLS.csv'
            #app_yu_g.to_csv(filename)
        except IOError as error:
            print(error)
    def get_all_userfeedbacks(self):
        """
            Get all users'feedbacks from table user's feedback
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)
        df_userfb = df_userfb.dropna()

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:

            ## Remove from column RTT: the values Blank space, NaN, Zero, #NULL!, +Inf
            df_userfb[self.config['COL']['RTT']] = df_userfb[
                self.config['COL']['RTT']].apply(lambda x: self.__to_number(x))

            ## Recording the inconsistent instances index
            dropIx = df_userfb[df_userfb[self.config['COL']['RTT']] ==
                               -1].index
            ## Dropping these instances from the dataset:
            df_userfb.drop(dropIx, inplace=True)
            df_userfb.reset_index(inplace=True)

            # Rename columns to match the columns name in model
            df_userfb = Helper.rename_df_columnname(df_userfb,
                                                    self.config['COL'])

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'all_ufb_except_unknown.csv'
            df_userfb.to_csv(filename)

        except IOError as error:
            print(error)

        return df_userfb
Exemple #3
0
    def get_feedack_per_user(self, criteria):
        """ Calculate feedback per user
        """

        ### criteria = {'nrows':10}
        user_feedback = UserFeedback(**self.config)

        ### Read admissions groupby date and
        ### Choose admissions of the year during which contains biggest number of admissions
        df_userfb = user_feedback.get_user_feedback(criteria)

        grouped_fbs_by_user = df_userfb.groupby(
            [self.config['GROUP_BY']['USER_ID']]).count().reset_index()

        _users = grouped_fbs_by_user['USER_ID']
        _index = np.arange(1, _users.shape[0] + 1)
        _counts = grouped_fbs_by_user['FEEDBACK_APP']

        filename = self.config['OUT_DIR'] + 'USER_FEEDBACK_21NOV19.csv'
        grouped_fbs_by_user.to_csv(filename)

        ### Draw histogram of Occurence corresponding to Application
        Helper.plot_graphline(_index, _counts, 0)
Exemple #4
0
    def __get_user_feedback(self, criteria=None):
        """ Read User Feedback
        """
        ### criteria = {'nrows':10}
        user_feedback = UserFeedback(**self.config)

        ### Read admissions groupby date and
        ### Choose admissions of the year during which contains biggest number of admissions
        df_userfb = user_feedback.get_user_feedback(criteria)

        ### Limit number of patients based on condition LIMIT_NUM_PATIENT
        #df_adms = self.__shape_num_patient_by_limit(df_adms)
        #filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ADMISSIONS']
        #FileHelper.save_to_csv(df_adms, filename)
        fd_for_app, counts = Helper.count_ucategory(df_userfb.iloc[:, 0])

        #fd_for_app = np.frompyfunc(lambda x: np.char.replace(x, "___MOS", "", count=0), 1, 1)(fd_for_app)

        #u_categories, counts = None, None

        # Helper.plot_hist(u_categories, counts)

        # fd_for_app = Helper.map_word_to_abbr(fd_for_app, self.config['WORD_ABBR']['FEEDBACK_APP_ATT'])

        # user_feedback.writte_csv(fd_for_app, counts)

        # print out first colms
        # print()
        # print(df_userfb)
        ### Draw histogram of Occurence corresponding to Application
        Helper.plot_histogram(fd_for_app, counts)

        # Draw histogram: In percentage the occurence corresponding to Application
        total_fbs = np.sum(counts)
        Helper.plot_histogram(fd_for_app, counts / total_fbs)

        return df_userfb
    def get_feedbackmos_youtubeautomos(self):
        """
            Generate users'feedback MOS vs YoutubeAuto MOS
            Given user's feedback MOS of a specific class, associate to MOS classes predicted by Youtube Auto
            For instance given user's feebdback MOS class = 1, count the number of MOS classes predicted 
            by Youtube Auto model.
            
            Feedback MoS: {user_fb_mos: 1, estimated_yuauto_mos: {cl-1: x1, cl-2: x2, cl-3: x3, cl-4: x4, cl-5:x5}
            Estimated MoS using Youtube Auto: [1..5]
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # Select 2 Columns: FB_APP and YU_VP_720
            # app_yu_g = app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]]
            app_yu_g = app_yu_g[[
                self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']
            ]]

            ## Remove Nan or Zero from records
            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            #df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns=self.config['COL']['YU_VP_720'], values=self.config['COL']['YU_VP_720'], aggfunc=np.sum)
            # df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns='idx', values=self.config['COL']['YU_VP_720'], aggfunc=lambda x: len(x.unique()))

            # Group Records by estimated MoS: (1, 5)
            # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
            df = app_yu_g.groupby([self.config['COL']['FB_VAL']
                                   ]).count().reset_index()
            df['idx'] = df[self.config['COL']['FB_VAL']]

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'groupby_user_cat_3.csv'
            # _groupby_df.transpose().to_csv(filename)
            df.to_csv(filename)

            # Get List of MoS from Config
            mos_list = self.config['CONST']['MOS']

            # Create dataframe of columns: [1, 2, 3, 4, 5]

            # Dataframe
            df = pd.DataFrame()
            # iterate over MOS list one by one from 1 to 5
            for mos_val in mos_list:
                # Filter by YOUTUBE___GENERAL___MOS
                _mask = app_yu_g[self.config['COL']['FB_VAL']] == mos_val
                _df = app_yu_g[_mask]

                # Group Records by estimated MoS: (1, 5)
                # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
                _groupby_df = _df.groupby([self.config['COL']['YU_VP_720']
                                           ]).count().reset_index()

                # Reshape row to column
                _sum = (
                    _groupby_df[self.config['COL']['FB_VAL']]).values.reshape(
                        1, -1)
                _sum = np.around(_sum / np.sum(_sum), decimals=2)
                _df = pd.DataFrame(
                    _sum,
                    index=[mos_val],
                    columns=_groupby_df[self.config['COL']['YU_VP_720']])

                #
                if df.empty:
                    df = pd.DataFrame(_df)
                else:
                    frames = [df, _df]
                    df = pd.concat(frames)

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'fb_user_cat_2.csv'
            # _groupby_df.transpose().to_csv(filename)
            app_yu_g.to_csv(filename)
            # Create the pandas DataFrame

            # import seaborn as sns; sns.set()
            # flights_long = sns.load_dataset("flights")
            # # Save for testing purpose
            # filename2 = self.config['OUT_DIR'] + 'flight_user_cat.csv'
            # # _groupby_df.transpose().to_csv(filename)
            # flights_long.to_csv(filename2)

            # Draw heatmap
            #Helper.plot_heatmap2(df)

        except IOError as error:
            print(error)
    def count_userfb_bygroup2(self):
        """
            Count user feedback by Group of 1 to 5
            Feedback MoS: [1..5]
            Estimated MoS: [1..5]
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read admissions groupby date and
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # Select 2 Columns: FB_APP and YU_VP_720
            # app_yu_g = app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]]
            ## Remove Nan or Zero from records
            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            # Assign data type as Int to column
            # app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g.astype({self.config['COL']['YU_VP_720']: 'int32'}).dtypes

            # Get List of MoS from Config
            mos_list = self.config['CONST']['MOS']

            # Create dataframe of columns: [1, 2, 3, 4, 5]

            # Dataframe
            df = pd.DataFrame()
            # iterate over MOS list one by one from 1 to 5
            for mos_val in mos_list:
                # Filter by YOUTUBE___GENERAL___MOS
                _mask = app_yu_g[self.config['COL']['FB_VAL']] == mos_val
                _df = app_yu_g[_mask]

                # Group Records by estimated MoS: (1, 5)
                # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
                _groupby_df = _df.groupby([self.config['COL']['YU_VP_720']
                                           ]).count().reset_index()

                # Reshape row to column
                #_sum = (_groupby_df[self.config['COL']['FB_VAL']]).values.reshape(1,-1)
                _sum = _groupby_df[self.config['COL']['FB_VAL']]
                # _sum = np.around(_sum/np.sum(_sum), decimals=2)
                _groupby_df['Val %'] = np.around(_sum / np.sum(_sum),
                                                 decimals=2)
                _groupby_df['Feedback_MoS'] = [
                    'm_' + str(mos_val) for i in range(5)
                ]

                # Save for testing purpose
                filename = self.config['OUT_DIR'] + 'fb_user_cat_2.csv'
                # _groupby_df.transpose().to_csv(filename)
                _groupby_df.to_csv(filename)

                #_df = pd.DataFrame(_sum, index=[mos_val], columns = _groupby_df[self.config['COL']['YU_VP_720']])
                # _df = pd.DataFrame(_sum, index=[mos_val for i in range(5)], columns = _groupby_df[self.config['COL']['YU_VP_720']])
                #_df = pd.DataFrame(_sum, index=[mos_val for i in range(5)], columns = ['A'])
                #
                if df.empty:
                    df = pd.DataFrame(_groupby_df)
                else:
                    frames = [df, _groupby_df]
                    df = pd.concat(frames)

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'fb_user_cat_1.csv'
            # _groupby_df.transpose().to_csv(filename)
            df.to_csv(filename)
            # Create the pandas DataFrame
            index = 'Feedback_MoS'
            columns = 'YOUTUBE___VIDEO_PLAYBACK___P_720___MOS'
            values = 'Val %'
            Helper.plot3(df, index, columns)

        except IOError as error:
            print(error)
    def __init__(self, **kwargs):
        self.config = kwargs

        # Read User Feedback from csv
        self.user_feedback = UserFeedback(**self.config)
class UserFbViewModel:
    """
        UserFbViewModel
    """
    def __init__(self, **kwargs):
        self.config = kwargs

        # Read User Feedback from csv
        self.user_feedback = UserFeedback(**self.config)

    def __to_nan(self, X):
        """ Check the value in X 
            if it match the errors, then replace it by NaN of Pandas.

            Parameters
            ----------
                X : value of any data type
            
            Returns
            ----------
                - NaN if it is an error value
                - X otherwise
        """
        # Error values to be identifier
        error_values = ['nan', '+Inf', '#NULL!', 'inf', 'Inf']
        try:
            if len(str(X)) == 0:
                return np.nan
            elif str(X) in error_values:
                return np.nan
            else:
                return X
        except ValueError:
            return np.nan

    def dataset_propre(self):
        """ This fuction create new dataset from 
        the original dataset
        """

        # Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = self.user_feedback.get_user_feedback(criteria)

        # Columns to map from original dataset to new dataset
        cols_name = ['FEEDBACK_APP', 'FEEDBACK_VALUE', 'USER_ID', 'RTT', \
                    'DOWNLOAD_LOSS_RATE', 'UPLOAD_LOSS_RATE', 'DOWNLOAD_JITTER', \
                    'UPLOAD_JITTER', 'UDP_DOWNLOAD_THROUGHPUT', 'UDP_UPLOAD_THROUGHPUT', \
                    'SKYPE___VOICE_CALL___MOS', 'YOUTUBE___VIDEO_PLAYBACK___P_720___MOS']

        # iterating the columns
        new_dataset = pd.DataFrame(columns=cols_name)
        for col in df_userfb.columns:
            new_dataset[col] = df_userfb.loc[:, col].apply(
                lambda x: self.__to_nan(x))

        # Drop NaN
        new_dataset = new_dataset.dropna()

        # Cols to remove
        del_cols = set(df_userfb.columns) - set(cols_name)
        new_dataset = new_dataset.drop(list(del_cols), axis=1)

        # Create empty DataFrame
        #new_dataset = pd.DataFrame(columns=cols_name)
        #for colname in cols_name:
        #new_dataset[colname] = df_userfb.loc[:, colname].apply(lambda x: self.__to_nan(x))

        # Save Dataframe to csv file
        filename = self.config['OUT_DIR'] + '2K_dataset.csv'
        # _groupby_df.transpose().to_csv(filename)
        new_dataset.to_csv(filename)

        return new_dataset

    def get_2k_userfeedback(self):
        # Read all feedbacks from table user's feedback
        criteria = {}
        df_2k_userfb = self.user_feedback.get_2k_userfeedback(criteria)

        return df_2k_userfb

    def count_userfb_bygroup(self):
        """
            Count user feedback by Group of 1 to 5
            Feedback MoS: [1..5]
            Estimated MoS: [1..5]
        """

        ### Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = self.user_feedback.get_user_feedback(criteria)

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # Select 2 Columns: FB_APP and YU_VP_720
            # app_yu_g = app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]]
            app_yu_g = app_yu_g[[
                self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']
            ]]

            ## Remove Nan or Zero from records
            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            #df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns=self.config['COL']['YU_VP_720'], values=self.config['COL']['YU_VP_720'], aggfunc=np.sum)
            # df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns='idx', values=self.config['COL']['YU_VP_720'], aggfunc=lambda x: len(x.unique()))

            # Group Records by estimated MoS: (1, 5)
            # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
            df = app_yu_g.groupby([self.config['COL']['FB_VAL']
                                   ]).count().reset_index()
            df['idx'] = df[self.config['COL']['FB_VAL']]

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'groupby_user_cat_3.csv'
            # _groupby_df.transpose().to_csv(filename)
            df.to_csv(filename)

            # Get List of MoS from Config
            mos_list = self.config['CONST']['MOS']

            # Create dataframe of columns: [1, 2, 3, 4, 5]

            # Dataframe
            df = pd.DataFrame()
            # iterate over MOS list one by one from 1 to 5
            for mos_val in mos_list:
                # Filter by YOUTUBE___GENERAL___MOS
                _mask = app_yu_g[self.config['COL']['FB_VAL']] == mos_val
                _df = app_yu_g[_mask]

                # Group Records by estimated MoS: (1, 5)
                # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
                _groupby_df = _df.groupby([self.config['COL']['YU_VP_720']
                                           ]).count().reset_index()

                # Reshape row to column
                _sum = (
                    _groupby_df[self.config['COL']['FB_VAL']]).values.reshape(
                        1, -1)
                _sum = np.around(_sum / np.sum(_sum), decimals=2)
                _df = pd.DataFrame(
                    _sum,
                    index=[mos_val],
                    columns=_groupby_df[self.config['COL']['YU_VP_720']])

                #
                if df.empty:
                    df = pd.DataFrame(_df)
                else:
                    frames = [df, _df]
                    df = pd.concat(frames)

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'fb_user_cat_2.csv'
            # _groupby_df.transpose().to_csv(filename)
            app_yu_g.to_csv(filename)
            # Create the pandas DataFrame

            # import seaborn as sns; sns.set()
            # flights_long = sns.load_dataset("flights")
            # # Save for testing purpose
            # filename2 = self.config['OUT_DIR'] + 'flight_user_cat.csv'
            # # _groupby_df.transpose().to_csv(filename)
            # flights_long.to_csv(filename2)

            # Draw heatmap
            #Helper.plot_heatmap2(df)

        except IOError as error:
            print(error)

    def __to_number(self, X):
        """ Convert string to float
        """

        exclude_values = ['nan', '+Inf', '#NULL!', 'inf', 'Inf']

        try:
            if len(str(X)) == 0 or (str(X) in exclude_values):
                return -1
            else:
                return float(X)
        except ValueError:
            return 0

    def get_all_userfeedbacks(self):
        """
            Get all users'feedbacks from table user's feedback
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)
        df_userfb = df_userfb.dropna()

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:

            ## Remove from column RTT: the values Blank space, NaN, Zero, #NULL!, +Inf
            df_userfb[self.config['COL']['RTT']] = df_userfb[
                self.config['COL']['RTT']].apply(lambda x: self.__to_number(x))

            ## Recording the inconsistent instances index
            dropIx = df_userfb[df_userfb[self.config['COL']['RTT']] ==
                               -1].index
            ## Dropping these instances from the dataset:
            df_userfb.drop(dropIx, inplace=True)
            df_userfb.reset_index(inplace=True)

            # Rename columns to match the columns name in model
            df_userfb = Helper.rename_df_columnname(df_userfb,
                                                    self.config['COL'])

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'all_ufb_except_unknown.csv'
            df_userfb.to_csv(filename)

        except IOError as error:
            print(error)

        return df_userfb

    def normalize_string(self, datafarame, feature_names):
        """ Replace decimal separator , anglophone decimal separator .
        """
        for name in feature_names:
            datafarame[name] = datafarame[name].apply(
                lambda x: float(x.split()[0].replace(',', '.')))
        return datafarame

    def dataset_youtubeauto(self):
        """ Generate dataset for Youtube Auto decision tree model
        """

        # Features for decision tree model
        #feature_names = ['RTT','UTH','DTH','DL','UL','DJ','UJ']
        # Columns to map from original dataset to new dataset
        feature_names = ['RTT', 'DOWNLOAD_LOSS_RATE', 'UPLOAD_LOSS_RATE', 'DOWNLOAD_JITTER', \
                    'UPLOAD_JITTER', 'UDP_DOWNLOAD_THROUGHPUT', 'UDP_UPLOAD_THROUGHPUT']

        # Load users'feedbacks from CSV
        df_userfbs = self.get_2k_userfeedback()

        # Test set
        X_test = []

        # Get all column names
        colunms = list(df_userfbs.columns.values)
        if all(_fname in colunms for _fname in feature_names):
            try:
                for idx, row in df_userfbs.iterrows():
                    # Convert metric units from ACQUA to YoutubeAuto
                    # # Nanosec to Millisec
                    # rtt_auto = (row['RTT']) / 1000000
                    # # Microsec to Millisec
                    # dj_auto = (row['DJ']) / 1000000000
                    # uj_auto = (row['UJ']) / 1000000000
                    # # bps to Kbps
                    # dth_auto = (row['DTH']) / 1024
                    # uth_auto = (row['UTH']) / 1024
                    # # Nanosec to Millisec
                    rtt_auto = (row['RTT'])
                    # Microsec to Millisec
                    dj_auto = (row['DOWNLOAD_JITTER'])
                    uj_auto = (row['UPLOAD_JITTER'])
                    # bps to Kbps
                    dth_auto = (row['UDP_DOWNLOAD_THROUGHPUT'])
                    uth_auto = (row['UDP_UPLOAD_THROUGHPUT'])

                    # Loss
                    ul_auto = (row['UPLOAD_LOSS_RATE'])
                    dl_auto = (row['DOWNLOAD_LOSS_RATE'])

                    # Check list of keys in dict
                    # access data using column names, cast to float
                    # (bydefault its strings and change ',' to '. for all records')
                    feature ={'idx':idx, 'DTH':dth_auto,'RTT':rtt_auto , 'DJ':dj_auto, \
                    'DL': dl_auto, 'UJ':uj_auto, 'UL':ul_auto , 'UTH':uth_auto }

                    # Add record to list
                    X_test.append(feature)

            except EOFError as error:
                return error

        # Save for testing purpose
        filename = self.config[
            'OUT_DIR'] + 'DATASET_EST_QOE_BYYOUTUBEAUTOMODEL.csv'
        df_userfbs.to_csv(filename)

        return X_test, df_userfbs

    def predict_youtubeauto(self):
        """
            Make predictiion for youtube auto
        """

        # Initialize decision tree
        dt = DecisionTreeYoutubeAuto(**self.config)

        # load json from file (feature data)
        filename = "youtube.dash.json"
        file_uri = FileHelper.dataset_path(self.config, filename)
        json_model = FileHelper.load_model_json(file_uri)

        # Generate X_test dataset
        X_test, df_userfbs = self.dataset_youtubeauto()

        # # Test with Mockup data
        # import random

        # dj_max = 124326180.9
        # uj_max = 128804961.5
        # ul_max = 1
        # dl_max = 1
        # uth_max = 29335149.8038362
        # dth_max = 18033919.2661197
        # rtt_max = 1000 #14226774236.96

        # X_test = []
        # number_X_test = 5
        # for i in np.arange(number_X_test):
        #     features_1 = None
        #     features_1 = {'idx': i,'RTT': random.uniform(0,rtt_max), 'DJ': random.uniform(0,dj_max), 'UJ':random.uniform(0, uj_max), 'DL': random.uniform(0,dl_max), 'UL': random.uniform(0, ul_max), 'DTH': random.uniform(0,dth_max), 'UTH': random.uniform(0,uth_max)}

        #     for k, v in features_1.items():
        #         if not k == 'idx':
        #             features_1[k] = round(v, 2)

        #     features_1['Userfeedback'] = 0
        #     features_1['Youtube_720P'] = 0

        #     # features_2 = {'DTH': randint(30000,4000000) ,'RTT': randint(500,300000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000,200000)}
        #     # features_3 = {'DTH': randint(1000000,4000000) ,'RTT': randint(1000,240000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000000,4000000)}
        #     X_test.append(features_1)

        # features_1 = {'idx': number_X_test,'RTT': 172211718, 'DL': 0.25, 'UL': 0.0, 'DJ': 1462940.373, 'UJ':967358.4, \
        #     'DTH': 6904033.241, 'UTH': 6688152.991, 'Userfeedback': 1, 'Youtube_720P': 1}
        # features_2 = {'idx': number_X_test+1,'RTT': 329342189.4, 'DL': 0.0, 'UL': 0.0, 'DJ': 2217979.68, 'UJ':4026196.84, \
        #     'DTH': 206766.1582, 'UTH':1152765.337, 'Userfeedback': 1, 'Youtube_720P': 1}
        # features_3 = {'idx': number_X_test+2,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \
        # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1}

        # features_4 = {'idx': number_X_test+3,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \
        # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1}

        # features_5 = {'idx': number_X_test+4, 'RTT': 3303547790, 'DL': 0.0, 'UL': 0.0, 'DJ': 11294340.32, 'UJ': 27797712.96, \
        # 'DTH': 110344.9385, 'UTH': 32443.88377, 'Userfeedback': 2, 'Youtube_720P': 2}

        # X_test.append(features_1)
        # X_test.append(features_2)
        # X_test.append(features_3)
        # X_test.append(features_4)
        # X_test.append(features_5)

        # Transform from List to Dataframe
        df_userfbs = pd.DataFrame(X_test)

        # Start our prediciton
        estimated_mos = dt.predict(json_model, X_test)

        # Create dataframe from estimated_mos
        # Save for testing purpose
        df_estimated_qoe = pd.DataFrame(estimated_mos)
        filename = self.config['OUT_DIR'] + 'Estimated_MOS_YoutubeAuto_New.csv'
        df_estimated_qoe.to_csv(filename)

        # Merge both X_test and estimated_QoE
        # Create dataframe from estimated_mos
        # Save for testing purpose
        df_merged = pd.merge(df_userfbs,
                             df_estimated_qoe,
                             left_index=True,
                             right_index=True)
        df_merged = df_merged.drop(['idx_x', 'idx_y'], axis='columns')
        filename = self.config[
            'OUT_DIR'] + 'DATASET_MOSUSERFEEDBACK_QOE_YOUTUBEAUTO.csv'
        df_merged.to_csv(filename)

        # Print the results
        dt.print(estimated_mos)
        return estimated_mos

    def count_userfb_bygroup2(self):
        """
            Count user feedback by Group of 1 to 5
            Feedback MoS: [1..5]
            Estimated MoS: [1..5]
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read admissions groupby date and
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # Select 2 Columns: FB_APP and YU_VP_720
            # app_yu_g = app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]]
            ## Remove Nan or Zero from records
            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            # Assign data type as Int to column
            # app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g.astype({self.config['COL']['YU_VP_720']: 'int32'}).dtypes

            # Get List of MoS from Config
            mos_list = self.config['CONST']['MOS']

            # Create dataframe of columns: [1, 2, 3, 4, 5]

            # Dataframe
            df = pd.DataFrame()
            # iterate over MOS list one by one from 1 to 5
            for mos_val in mos_list:
                # Filter by YOUTUBE___GENERAL___MOS
                _mask = app_yu_g[self.config['COL']['FB_VAL']] == mos_val
                _df = app_yu_g[_mask]

                # Group Records by estimated MoS: (1, 5)
                # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
                _groupby_df = _df.groupby([self.config['COL']['YU_VP_720']
                                           ]).count().reset_index()

                # Reshape row to column
                #_sum = (_groupby_df[self.config['COL']['FB_VAL']]).values.reshape(1,-1)
                _sum = _groupby_df[self.config['COL']['FB_VAL']]
                # _sum = np.around(_sum/np.sum(_sum), decimals=2)
                _groupby_df['Val %'] = np.around(_sum / np.sum(_sum),
                                                 decimals=2)
                _groupby_df['Feedback_MoS'] = [
                    'm_' + str(mos_val) for i in range(5)
                ]

                # Save for testing purpose
                filename = self.config['OUT_DIR'] + 'fb_user_cat_2.csv'
                # _groupby_df.transpose().to_csv(filename)
                _groupby_df.to_csv(filename)

                #_df = pd.DataFrame(_sum, index=[mos_val], columns = _groupby_df[self.config['COL']['YU_VP_720']])
                # _df = pd.DataFrame(_sum, index=[mos_val for i in range(5)], columns = _groupby_df[self.config['COL']['YU_VP_720']])
                #_df = pd.DataFrame(_sum, index=[mos_val for i in range(5)], columns = ['A'])
                #
                if df.empty:
                    df = pd.DataFrame(_groupby_df)
                else:
                    frames = [df, _groupby_df]
                    df = pd.concat(frames)

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'fb_user_cat_1.csv'
            # _groupby_df.transpose().to_csv(filename)
            df.to_csv(filename)
            # Create the pandas DataFrame
            index = 'Feedback_MoS'
            columns = 'YOUTUBE___VIDEO_PLAYBACK___P_720___MOS'
            values = 'Val %'
            Helper.plot3(df, index, columns)

        except IOError as error:
            print(error)

    def get_feedbackmos_youtubeautomos(self):
        """
            Generate users'feedback MOS vs YoutubeAuto MOS
            Given user's feedback MOS of a specific class, associate to MOS classes predicted by Youtube Auto
            For instance given user's feebdback MOS class = 1, count the number of MOS classes predicted 
            by Youtube Auto model.
            
            Feedback MoS: {user_fb_mos: 1, estimated_yuauto_mos: {cl-1: x1, cl-2: x2, cl-3: x3, cl-4: x4, cl-5:x5}
            Estimated MoS using Youtube Auto: [1..5]
        """

        # Read User Feedback from csv
        user_feedback = UserFeedback(**self.config)
        ### Read all feedbacks from table user's feedback
        criteria = {}
        df_userfb = user_feedback.get_user_feedback(criteria)

        # Filter by Feedback_App name; Feedback_App= YOUTUBE_GENERAL
        try:
            # Filter by YOUTUBE___GENERAL___MOS
            mask = df_userfb[self.config['COL']
                             ['FB_APP']] == self.config['COL']['YU_G']
            app_yu_g = df_userfb[mask]

            # Select 2 Columns: FB_APP and YU_VP_720
            # app_yu_g = app_yu_g[[self.config['COL']['FB_DATE'], self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']]]
            app_yu_g = app_yu_g[[
                self.config['COL']['FB_VAL'], self.config['COL']['YU_VP_720']
            ]]

            ## Remove Nan or Zero from records
            app_yu_g[self.config['COL']['YU_VP_720']] = app_yu_g[
                self.config['COL']['YU_VP_720']].apply(
                    lambda x: 0 if len(str(x)) == 0 or str(x) == 'nan' else x)

            ## Recording the inconsistent instances index
            dropIx = app_yu_g[app_yu_g[self.config['COL']['YU_VP_720']] ==
                              0].index
            ## Dropping these instances from the dataset:
            app_yu_g.drop(dropIx, inplace=True)

            #df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns=self.config['COL']['YU_VP_720'], values=self.config['COL']['YU_VP_720'], aggfunc=np.sum)
            # df = pd.pivot_table(app_yu_g, index=self.config['COL']['FB_VAL'], columns='idx', values=self.config['COL']['YU_VP_720'], aggfunc=lambda x: len(x.unique()))

            # Group Records by estimated MoS: (1, 5)
            # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
            df = app_yu_g.groupby([self.config['COL']['FB_VAL']
                                   ]).count().reset_index()
            df['idx'] = df[self.config['COL']['FB_VAL']]

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'groupby_user_cat_3.csv'
            # _groupby_df.transpose().to_csv(filename)
            df.to_csv(filename)

            # Get List of MoS from Config
            mos_list = self.config['CONST']['MOS']

            # Create dataframe of columns: [1, 2, 3, 4, 5]

            # Dataframe
            df = pd.DataFrame()
            # iterate over MOS list one by one from 1 to 5
            for mos_val in mos_list:
                # Filter by YOUTUBE___GENERAL___MOS
                _mask = app_yu_g[self.config['COL']['FB_VAL']] == mos_val
                _df = app_yu_g[_mask]

                # Group Records by estimated MoS: (1, 5)
                # grouped_fbs_by_user = df_userfbs.groupby([self.config['GROUP_BY']['USER_ID']]).count().reset_index()
                _groupby_df = _df.groupby([self.config['COL']['YU_VP_720']
                                           ]).count().reset_index()

                # Reshape row to column
                _sum = (
                    _groupby_df[self.config['COL']['FB_VAL']]).values.reshape(
                        1, -1)
                _sum = np.around(_sum / np.sum(_sum), decimals=2)
                _df = pd.DataFrame(
                    _sum,
                    index=[mos_val],
                    columns=_groupby_df[self.config['COL']['YU_VP_720']])

                #
                if df.empty:
                    df = pd.DataFrame(_df)
                else:
                    frames = [df, _df]
                    df = pd.concat(frames)

            # Save for testing purpose
            filename = self.config['OUT_DIR'] + 'fb_user_cat_2.csv'
            # _groupby_df.transpose().to_csv(filename)
            app_yu_g.to_csv(filename)
            # Create the pandas DataFrame

            # import seaborn as sns; sns.set()
            # flights_long = sns.load_dataset("flights")
            # # Save for testing purpose
            # filename2 = self.config['OUT_DIR'] + 'flight_user_cat.csv'
            # # _groupby_df.transpose().to_csv(filename)
            # flights_long.to_csv(filename2)

            # Draw heatmap
            #Helper.plot_heatmap2(df)

        except IOError as error:
            print(error)