예제 #1
0
    def _configure_data(self):
        # path_o          = self.path+'output.p'
        # path_y          = self.path + '/pred.p'
        # path_csv        = self.path + 'hist.csv'
        #
        # dict            = pickle_load(path_o,None)
        # df_o_t          = dict['df_o_t']
        # df_o_f          = dict['df_o_f']
        #
        # dict            = pickle_load(path_y,None)
        # df_y_t          = dict['df_y_t']
        # df_y_f          = dict['df_y_f']
        #
        # df_stats        = pd.read_csv(path_csv)
        #
        #
        #
        #
        # self.AUC        = max(np.array(df_stats['AUC_v']))
        # self.df_true    = df_o_t.join(df_y_t).sort_values(by = 'error_tm')
        #
        #
        #
        # self.df_false   = df_o_f.join(df_y_f, how = 'inner').sort_values(by = 'error_tm')
        #
        #
        # self.x          = np.zeros(100)
        # self.df         = self.df_true
        #
        # self.resolution = self.dict_c['resolution']
        # self.height     =  np.arange(self.dict_c['min_h'], self.dict_c['max_h'], self.dict_c['resolution'])[-1]
        #
        # array_t = zip(list(df_o_t['data_y']), list(df_y_t['data_y_p']))
        # df_o_t['error_m'] = list(map(self._get_error_m, array_t))

        # print('XXXXXXXXXXXXXXXXXXXXXXXXXXXx')
        # print(np.mean(list(map(np.mean,df_o_t['error_m']))))
        #
        #
        # print(len(self.df_false))
        # print(len(self.df_true))
        #
        # print(np.array(self.df_false['error_tm']))

        if (self.dict_c['plot_mode'] == 'error'):
            path = self.path + 'best/data_best.p'
            data = pickle_load(path, None)['mode']

        else:
            path = self.path + 'best/df_val.p'
            data = pickle_load(path, None)

        data = data
        self.df = data
예제 #2
0
    def __init__(self, dict_c):

        self.dict_c = dict_c
        self.len_df = None
        self.count_t = None

        self.min_final = 1000.
        self.max_final = 0.
        self.Series = pickle_load('./data/processed/ST/df_img.p',
                                  self.main_data_conf)

        self.mean_ = self.Series['mean']
        self.std = self.Series['std']

        self.df_f_val = None
        self.df_f_test = None
        self.df_f_train = None

        self.df_t_train = None
        self.df_t_val = None
        self.df_t_test = None

        self.configure_shuffle()

        print(len(self.df_f_train), len(self.df_f_val), len(self.df_f_test))
        print(len(self.df_t_train), len(self.df_t_val), len(self.df_t_test))
예제 #3
0
    def configure_data(self):

        array = []
        for path in self.dict_c['path_a']:

            list_names = os.listdir(path)[:10]

            for name in list_names:
                try:
                    path_best = path + name + '/best/data_best.p'
                    data = pickle_load(path_best, None)

                    error_ = list(data['df_f_train']['error_m'])
                    error_.extend(list(data['df_t_train']['error_m']))
                    error_ = np.array(error_)
                    dict_ = {
                        'error_m': error_,
                        'path': path + name + '/',
                        'AUC_v': data['AUC_v']
                    }
                    array.append(dict_)
                except Exception as e:
                    pass
        df = pd.DataFrame(array, columns=['error_m', 'path', 'AUC_v'])
        return df
예제 #4
0
    def main_data_conf(self, *args):
        self.path = './data/raw/configured_raw/'
        self.list_names = os.listdir(self.path)

        GD = pp.get_df(self.path)
        path_o = './data/processed/df/df_1.p'

        df = pickle_load(path_o, GD.get_df_data, *self.list_names)
        df['images'] = list(map(self.add_frames_df, df['frames']))

        tmp = np.vstack(df['images'])

        df_f = df[df['label'] == False]
        df_t = df[df['label'] == True]
        dict_ = {
            'df_f': df_f,
            'df_t': df_t,
            'max': np.max(tmp),
            'min': np.max(tmp),
            'mean': np.mean(tmp),
            'std': np.std(tmp)
        }

        Series = pd.Series(dict_)

        return Series
예제 #5
0
    def main_data_conf(self, *args):
        path_df, path_sc_p, path_sc_v, _ = self.return_path_pd(self.dict_c)
        self.df = pickle_load(path_df, self.peak_derivation, ()).iloc[:]
        self.len_df = len(self.df)

        self.df = apply_by_multiprocessing(self.df,
                                           self._configure_data_movie,
                                           axis=1,
                                           workers=12)
        self.df = self.df[self.df['data_X'] != '']
        self.df = self.df[self.df['data_y'] != '']
        self.count_t = len(self.df)
        self.df = self.df[[
            'name', 'label', 'frames', 'data_X', 'data_y', 'segmentation',
            'location', 'movieID'
        ]]
        self._print_data()

        df_f = self.df[self.df['label'] == False]
        df_t = self.df[self.df['label'] == True]

        dict_ = {'df_f': df_f, 'df_t': df_t}
        Series = pd.Series(dict_)

        return Series
예제 #6
0
    def __init__(self, dict_c):
        self.path = dict_c['path']
        self.dict_c = pickle_load(dict_c['path_dict'], None)
        self.dict_c['mode'] = dict_c['mode']
        self.dict_c['plot_mode'] = dict_c['plot_mode']

        self.AUC_max = None
        self.x = None
        self.df = None

        self._configure_data()
예제 #7
0
    def load_data(self,path):
        list_names = os.listdir(path)
        array_data = []
        array_path = []

        array_dict = []

        for name in list_names:
            try:
                path_best = path+name+'/best/data_best.p'
                path_app  = path+name
                path_app  = path_app.replace('bayes_opt','CMA_ES')
                data      = pickle_load(path_best,None)
                dict_c    = pickle_load(path+name+'/dict.p',None)
                array_data.append(data)
                array_path.append(path_app)
                array_dict.append(dict_c)

            except Exception as e:
                print(e)
        return array_data,array_path,array_dict
예제 #8
0
    def __init__(self, dict_c=None, path=None):

        if (dict_c != None):
            FS_manager.__init__(self, dict_c['path_save'])
            pickle_save(self.return_path_dict(), dict_c)
            self.dict_c = dict_c

        elif (path != None):
            FS_manager.__init__(self, path)
            self.dict_c = pickle_load(self.return_path_dict(), None)
        else:
            print('WRONG CONF, GIVE PATH OR DICT')
예제 #9
0
    def peak_derivation(self, *args):
        path_pd, path_sc_p, path_sc_v, path = self.return_path_pd(self.dict_c)

        #### get dataframe ############
        GD = pp.get_df(self.path)
        path_o = './data/processed/df/df_1.p'
        df = pickle_load(path_o, GD.get_df_data, *self.list_names)

        ### movie pictures to interim stage
        print('MOVING PICTURES')
        # MV = pp.Move_p()
        # df = MV.move_p    ictures(df)

        # apply background subtraction
        print('BGS')
        # bgs = pp.BGS(self.dict_c)
        # bgs.main(df)

        # peak derivation
        print('Peak derivation')
        PP = pp.path_Generation(df, self.dict_c)
        df = PP.main()

        ## Train scaler ####
        print('Scaler')
        scaler = pp.scaler()
        self.scaler_p = pickle_load(path_sc_p, scaler._train_scaler, df,
                                    'data_p')
        self.scaler_v = pickle_load(path_sc_v, scaler._train_scaler, df,
                                    'data_v')
        df = scaler.main(df, self.scaler_p, self.scaler_v)

        print('PCA')
        ## PCA          ####
        PCA_mod = pp.PCA_(self.dict_c)
        df = PCA_mod.main(df, path)

        return df
예제 #10
0
    def main_CMA_ES(self, *args):

        data = args[0]
        Queue = args[1]

        self.df_f_train = data['df_f_train']
        self.df_f_val = data['df_f_val']
        self.df_f_test = data['df_f_test']

        self.df_t_train = data['df_t_train']
        self.df_t_val = data['df_t_val']
        self.df_t_test = data['df_t_test']

        dimension = self.df_f_train['error_e'].iloc[0].shape[1]

        es = cma.fmin(
            self._opt_function, dimension * [1], self.sigma, {
                'bounds': self.bounds,
                'maxfevals': self.evals,
                'verb_disp': self.verbose,
                'verb_log': self.verbose_log
            })

        data = (self.AUC_tr_max, self.AUC_v_max, self.AUC_t_maxv)

        Queue.put(self.AUC_v_max)

        print('Solution CMA')
        print(es[0])
        print(self.AUC_tr_max, self.AUC_v_max, self.AUC_t_maxv)
        print()

        df = pd.DataFrame([{
            'AUC': self.AUC_tr_max,
            'AUC_v': self.AUC_v_max,
            'AUC_t': self.AUC_t_maxv
        }])[['AUC', 'AUC_v', 'AUC_t']]
        path = self.dict_c['path_save'] + 'hist.p'
        df_saved = pickle_load(path, None)
        df_saved = df_saved.append(df, ignore_index=False)
        pickle_save_(path, df_saved)
        self.plot(df_saved)
예제 #11
0
    def load_data(self, df):

        dict_ = {}

        array_data = []
        keys = [
            'df_t_train', 'df_t_val', 'df_t_test', 'df_f_train', 'df_f_val',
            'df_f_test'
        ]
        for i in range(len(df)):
            path_best = df['path'].iloc[i] + 'best/data_best.p'
            data = pickle_load(path_best, None)
            array_data.append(data)

        for key in keys:
            array_dicts = []
            for data_point in range(len(array_data[0][key])):
                dict_DP = {}
                for model_id in range(len(array_data)):
                    if (model_id == 0):
                        data = array_data[model_id][key]['error_v'].iloc[
                            data_point]
                    else:
                        data = np.vstack((data, array_data[model_id][key]
                                          ['error_v'].iloc[data_point]))

                data = data.reshape(-1, self.dict_c['clusters'])

                dict_DP['error_e'] = data

                array_dicts.append(dict_DP)
            dict_[key] = pd.DataFrame(array_dicts,
                                      columns=[
                                          'error_e', 'location',
                                          'segmentation', 'frames', 'label'
                                      ])

        return dict_
예제 #12
0
    def _get_data_no_cma(self, dict_data):
        i = dict_data['epoch']
        df_t_train = dict_data['df_t_train']
        df_t_val = dict_data['df_t_val']
        df_t_test = dict_data['df_t_test']

        df_f_train = dict_data['df_f_train']
        df_f_val = dict_data['df_f_val']
        df_f_test = dict_data['df_f_test']

        dimension = df_t_train.iloc[0]['data_X'].shape[2]

        loss_f_tr = dict_data['loss_f_tr']
        loss_f_v = dict_data['loss_f_v']

        self.AUC_max = 0
        self.AUC_min = 100

        array_t = zip(list(df_t_train['data_y']), list(df_t_train['data_y_p']))
        df_t_train['error_e'] = list(map(self._get_error_cma, array_t))
        df_t_train['error_v'] = list(
            map(self.get_error_vis, df_t_train['error_e']))
        df_t_train['error_m'] = list(
            map(self._get_error_m, df_t_train['error_v']))

        array_t = zip(list(df_t_val['data_y']), list(df_t_val['data_y_p']))
        df_t_val['error_e'] = list(map(self._get_error_cma, array_t))
        df_t_val['error_v'] = list(map(self.get_error_vis,
                                       df_t_val['error_e']))
        df_t_val['error_m'] = list(map(self._get_error_m, df_t_val['error_v']))

        array_t = zip(list(df_t_test['data_y']), list(df_t_test['data_y_p']))
        df_t_test['error_e'] = list(map(self._get_error_cma, array_t))
        df_t_test['error_v'] = list(
            map(self.get_error_vis, df_t_test['error_e']))
        df_t_test['error_m'] = list(
            map(self._get_error_m, df_t_test['error_v']))

        array_f = zip(list(df_f_train['data_y']), list(df_f_train['data_y_p']))
        df_f_train['error_e'] = list(map(self._get_error_cma, array_f))
        df_f_train['error_v'] = list(
            map(self.get_error_vis, df_f_train['error_e']))
        df_f_train['error_m'] = list(
            map(self._get_error_m, df_f_train['error_v']))

        array_f = zip(list(df_f_val['data_y']), list(df_f_val['data_y_p']))
        df_f_val['error_e'] = list(map(self._get_error_cma, array_f))
        df_f_val['error_v'] = list(map(self.get_error_vis,
                                       df_f_val['error_e']))
        df_f_val['error_m'] = list(map(self._get_error_m, df_f_val['error_v']))

        array_f = zip(list(df_f_test['data_y']), list(df_f_test['data_y_p']))
        df_f_test['error_e'] = list(map(self._get_error_cma, array_f))
        df_f_test['error_v'] = list(
            map(self.get_error_vis, df_f_test['error_e']))
        df_f_test['error_m'] = list(
            map(self._get_error_m, df_f_test['error_v']))

        df_t_train_val = pd.concat([df_t_train, df_t_val])

        AUC, FPR, TPR = self.get_AUC_score(df_t_train_val['error_m'],
                                           df_f_train['error_m'])
        AUC_v, FPR_v, TPR_v = self.get_AUC_score(df_t_train_val['error_m'],
                                                 df_f_val['error_m'])
        AUC_t, FPR_t, TPR_t = self.get_AUC_score(df_t_test['error_m'],
                                                 df_f_test['error_m'])

        loss_t_tr, loss_t_v, loss_t_t, loss_f_t, loss_t_v_tr = self._calc_loss(
            df_t_train, df_f_train, df_t_val, df_f_val, df_t_test, df_f_test)
        dict_data = {
            'AUC': AUC,
            'FPR': FPR,
            'TPR': TPR,
            'AUC_v': AUC_v,
            'TPR_v': TPR_v,
            'FPR_v': FPR_v,
            'AUC_t': AUC_t,
            'TPR_t': TPR_t,
            'FPR_t': FPR_t,
            'train_f': loss_f_tr,
            'val_f': loss_f_v,
            'test_f': loss_f_t,
            'train_t': loss_t_tr,
            'val_t': loss_t_v,
            'test_t': loss_t_t,
            'train_val_t': loss_t_v_tr,
            'path_o': dict_data['path_o'],
            'epoch': dict_data['epoch'],
            'df_t_train': df_t_train,
            'df_t_val': df_t_val,
            'df_t_test': df_t_test,
            'df_t_val_train': df_t_train_val,
            'df_f_train': df_f_train,
            'df_f_val': df_f_val,
            'df_f_test': df_f_test,
        }

        path = dict_data['path_o'] + 'hist.p'
        df = pd.DataFrame([dict_data])[[
            'AUC', 'AUC_v', 'AUC_t', 'train_f', 'train_t', 'val_f', 'val_t',
            'test_t', 'test_f', 'train_val_t'
        ]]

        if ('hist.p' not in os.listdir(dict_data['path_o'])):
            pickle_save(path, df)
            df_saved = df
        else:

            df_saved = pickle_load(path, None)
            df_saved = df_saved.append(df, ignore_index=False)

            pickle_save(path, df_saved)

        path_b = dict_data['path_o'] + 'best/'

        if (os.path.exists(path_b) == False):
            os.mkdir(path_b)

        if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))):
            pickle_save_(path_b + 'df_val.p', dict_data['df_t_val'])

        if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))):
            dict_data['df_t_train'] = dict_data['df_t_train'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_t_val'] = dict_data['df_t_val'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_t_test'] = dict_data['df_t_test'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_t_val_train'] = dict_data['df_t_val_train'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]

            dict_data['df_f_train'] = dict_data['df_f_train'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_f_val'] = dict_data['df_f_val'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_f_test'] = dict_data['df_f_test'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]

            pickle_save_(path_b + 'data_best.p', dict_data)

        return dict_data
예제 #13
0
    def get_data_dict(self, dict_, epoch):
        df_t_train = dict_['df_t_train']
        df_t_val = dict_['df_t_val']
        df_t_test = dict_['df_t_test']

        df_f_train = dict_['df_f_train']
        df_f_val = dict_['df_f_val']
        df_f_test = dict_['df_f_test']

        AUC, FPR, TPR = self.get_AUC_score(df_t_train['error_m'],
                                           df_f_train['error_m'])
        AUC_v, FPR_v, TPR_v = self.get_AUC_score(df_t_val['error_m'],
                                                 df_f_val['error_m'])
        AUC_t, FPR_t, TPR_t = self.get_AUC_score(df_t_test['error_m'],
                                                 df_f_test['error_m'])

        dict_data = {
            'AUC': AUC,
            'FPR': FPR,
            'TPR': TPR,
            'AUC_v': AUC_v,
            'TPR_v': TPR_v,
            'FPR_v': FPR_v,
            'AUC_t': AUC_t,
            'TPR_t': TPR_t,
            'FPR_t': FPR_t,
            'df_t_train': df_t_train,
            'df_t_val': df_t_val,
            'df_t_test': df_t_test,
            'df_f_train': df_f_train,
            'df_f_val': df_f_val,
            'df_f_test': df_f_test,
            'epoch': epoch
        }

        path = self.dict_c['path_save'] + 'hist.p'
        df = pd.DataFrame([dict_data])[['AUC', 'AUC_v', 'AUC_t']]

        if ('hist.p' not in os.listdir(self.dict_c['path_save'])):
            pickle_save_(path, df)
            df_saved = df
        else:

            df_saved = pickle_load(path, None)
            df_saved = df_saved.append(df, ignore_index=False)

            pickle_save_(path, df_saved)
        if (epoch % 10 == 0):
            self.plot(df_saved)

        path_b = self.dict_c['path_save'] + 'best/'

        if (os.path.exists(path_b) == False):
            os.mkdir(path_b)

        if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))):
            dict_data['df_t_train'] = dict_data['df_t_train'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_t_val'] = dict_data['df_t_val'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_t_test'] = dict_data['df_t_test'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]

            dict_data['df_f_train'] = dict_data['df_f_train'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_f_val'] = dict_data['df_f_val'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]
            dict_data['df_f_test'] = dict_data['df_f_test'][[
                'error_e', 'error_m', 'error_v', 'location', 'segmentation',
                'frames', 'label'
            ]]

            pickle_save_(path_b + 'data_best.p', dict_data)

        return AUC_v
예제 #14
0
    def save_data(self):

        AUC, FPR, TPR = self.AUC_tr_max, self.FPR_tr, self.TPR_tr
        AUC_v, FPR_v, TPR_v = self._opt_function_(self.best_x_v,
                                                  self.df_f_val['error_e'],
                                                  self.df_t_val['error_e'])
        AUC_t, FPR_t, TPR_t = self._opt_function_(self.best_x_v,
                                                  self.df_f_test['error_e'],
                                                  self.df_t_test['error_e'])

        path = self.dict_c['path_save'] + 'hist.p'
        df_saved = pickle_load(path, None)
        if (AUC_v > max(df_saved['AUC_v'])):
            self.df_f_train['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_f_train['error_e'])))
            self.df_f_val['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_f_val['error_e'])))
            self.df_f_test['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_f_test['error_e'])))

            self.df_t_train['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_t_train['error_e'])))
            self.df_t_val['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_t_val['error_e'])))
            self.df_t_test['error_v'] = list(
                map(
                    functools.partial(self._get_error_ensemble,
                                      x=self.best_x_v),
                    np.array(self.df_t_test['error_e'])))

            self.df_f_train['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_f_train['error_e'])))
            self.df_f_val['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_f_val['error_e'])))
            self.df_f_test['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_f_test['error_e'])))

            self.df_t_train['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_t_train['error_e'])))
            self.df_t_val['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_t_val['error_e'])))
            self.df_t_test['error_m'] = list(
                map(functools.partial(self._get_error_max, x=self.best_x_v),
                    np.array(self.df_t_test['error_e'])))

            dict_ = {
                'x_v': self.best_x_v,
                'x_tr': self.best_x_tr,
                'AUC_tr': self.AUC_tr_max,
                'AUC_v': AUC_v,
                'AUC_t': AUC_t,
                'FPR': FPR,
                'TPR': TPR,
                'FPR_v': FPR_v,
                'TPR_v': TPR_v,
                'FPR_t': FPR_t,
                'TPR_t': TPR_t,
                'df_f_train': self.df_f_train,
                'df_f_val': self.df_f_val,
                'df_f_test': self.df_f_test,
                'df_t_train': self.df_t_train,
                'df_t_val': self.df_t_val,
                'df_t_test': self.df_t_test,
            }

            self._configure_dir(self.dict_c['path_save'] + 'best')
            pickle_save(self.dict_c['path_save'] + 'best/data_best.p', dict_)