def _configure_data(self): # path_o = self.path+'output.p' # path_y = self.path + '/pred.p' # path_csv = self.path + 'hist.csv' # # dict = pickle_load(path_o,None) # df_o_t = dict['df_o_t'] # df_o_f = dict['df_o_f'] # # dict = pickle_load(path_y,None) # df_y_t = dict['df_y_t'] # df_y_f = dict['df_y_f'] # # df_stats = pd.read_csv(path_csv) # # # # # self.AUC = max(np.array(df_stats['AUC_v'])) # self.df_true = df_o_t.join(df_y_t).sort_values(by = 'error_tm') # # # # self.df_false = df_o_f.join(df_y_f, how = 'inner').sort_values(by = 'error_tm') # # # self.x = np.zeros(100) # self.df = self.df_true # # self.resolution = self.dict_c['resolution'] # self.height = np.arange(self.dict_c['min_h'], self.dict_c['max_h'], self.dict_c['resolution'])[-1] # # array_t = zip(list(df_o_t['data_y']), list(df_y_t['data_y_p'])) # df_o_t['error_m'] = list(map(self._get_error_m, array_t)) # print('XXXXXXXXXXXXXXXXXXXXXXXXXXXx') # print(np.mean(list(map(np.mean,df_o_t['error_m'])))) # # # print(len(self.df_false)) # print(len(self.df_true)) # # print(np.array(self.df_false['error_tm'])) if (self.dict_c['plot_mode'] == 'error'): path = self.path + 'best/data_best.p' data = pickle_load(path, None)['mode'] else: path = self.path + 'best/df_val.p' data = pickle_load(path, None) data = data self.df = data
def __init__(self, dict_c): self.dict_c = dict_c self.len_df = None self.count_t = None self.min_final = 1000. self.max_final = 0. self.Series = pickle_load('./data/processed/ST/df_img.p', self.main_data_conf) self.mean_ = self.Series['mean'] self.std = self.Series['std'] self.df_f_val = None self.df_f_test = None self.df_f_train = None self.df_t_train = None self.df_t_val = None self.df_t_test = None self.configure_shuffle() print(len(self.df_f_train), len(self.df_f_val), len(self.df_f_test)) print(len(self.df_t_train), len(self.df_t_val), len(self.df_t_test))
def configure_data(self): array = [] for path in self.dict_c['path_a']: list_names = os.listdir(path)[:10] for name in list_names: try: path_best = path + name + '/best/data_best.p' data = pickle_load(path_best, None) error_ = list(data['df_f_train']['error_m']) error_.extend(list(data['df_t_train']['error_m'])) error_ = np.array(error_) dict_ = { 'error_m': error_, 'path': path + name + '/', 'AUC_v': data['AUC_v'] } array.append(dict_) except Exception as e: pass df = pd.DataFrame(array, columns=['error_m', 'path', 'AUC_v']) return df
def main_data_conf(self, *args): self.path = './data/raw/configured_raw/' self.list_names = os.listdir(self.path) GD = pp.get_df(self.path) path_o = './data/processed/df/df_1.p' df = pickle_load(path_o, GD.get_df_data, *self.list_names) df['images'] = list(map(self.add_frames_df, df['frames'])) tmp = np.vstack(df['images']) df_f = df[df['label'] == False] df_t = df[df['label'] == True] dict_ = { 'df_f': df_f, 'df_t': df_t, 'max': np.max(tmp), 'min': np.max(tmp), 'mean': np.mean(tmp), 'std': np.std(tmp) } Series = pd.Series(dict_) return Series
def main_data_conf(self, *args): path_df, path_sc_p, path_sc_v, _ = self.return_path_pd(self.dict_c) self.df = pickle_load(path_df, self.peak_derivation, ()).iloc[:] self.len_df = len(self.df) self.df = apply_by_multiprocessing(self.df, self._configure_data_movie, axis=1, workers=12) self.df = self.df[self.df['data_X'] != ''] self.df = self.df[self.df['data_y'] != ''] self.count_t = len(self.df) self.df = self.df[[ 'name', 'label', 'frames', 'data_X', 'data_y', 'segmentation', 'location', 'movieID' ]] self._print_data() df_f = self.df[self.df['label'] == False] df_t = self.df[self.df['label'] == True] dict_ = {'df_f': df_f, 'df_t': df_t} Series = pd.Series(dict_) return Series
def __init__(self, dict_c): self.path = dict_c['path'] self.dict_c = pickle_load(dict_c['path_dict'], None) self.dict_c['mode'] = dict_c['mode'] self.dict_c['plot_mode'] = dict_c['plot_mode'] self.AUC_max = None self.x = None self.df = None self._configure_data()
def load_data(self,path): list_names = os.listdir(path) array_data = [] array_path = [] array_dict = [] for name in list_names: try: path_best = path+name+'/best/data_best.p' path_app = path+name path_app = path_app.replace('bayes_opt','CMA_ES') data = pickle_load(path_best,None) dict_c = pickle_load(path+name+'/dict.p',None) array_data.append(data) array_path.append(path_app) array_dict.append(dict_c) except Exception as e: print(e) return array_data,array_path,array_dict
def __init__(self, dict_c=None, path=None): if (dict_c != None): FS_manager.__init__(self, dict_c['path_save']) pickle_save(self.return_path_dict(), dict_c) self.dict_c = dict_c elif (path != None): FS_manager.__init__(self, path) self.dict_c = pickle_load(self.return_path_dict(), None) else: print('WRONG CONF, GIVE PATH OR DICT')
def peak_derivation(self, *args): path_pd, path_sc_p, path_sc_v, path = self.return_path_pd(self.dict_c) #### get dataframe ############ GD = pp.get_df(self.path) path_o = './data/processed/df/df_1.p' df = pickle_load(path_o, GD.get_df_data, *self.list_names) ### movie pictures to interim stage print('MOVING PICTURES') # MV = pp.Move_p() # df = MV.move_p ictures(df) # apply background subtraction print('BGS') # bgs = pp.BGS(self.dict_c) # bgs.main(df) # peak derivation print('Peak derivation') PP = pp.path_Generation(df, self.dict_c) df = PP.main() ## Train scaler #### print('Scaler') scaler = pp.scaler() self.scaler_p = pickle_load(path_sc_p, scaler._train_scaler, df, 'data_p') self.scaler_v = pickle_load(path_sc_v, scaler._train_scaler, df, 'data_v') df = scaler.main(df, self.scaler_p, self.scaler_v) print('PCA') ## PCA #### PCA_mod = pp.PCA_(self.dict_c) df = PCA_mod.main(df, path) return df
def main_CMA_ES(self, *args): data = args[0] Queue = args[1] self.df_f_train = data['df_f_train'] self.df_f_val = data['df_f_val'] self.df_f_test = data['df_f_test'] self.df_t_train = data['df_t_train'] self.df_t_val = data['df_t_val'] self.df_t_test = data['df_t_test'] dimension = self.df_f_train['error_e'].iloc[0].shape[1] es = cma.fmin( self._opt_function, dimension * [1], self.sigma, { 'bounds': self.bounds, 'maxfevals': self.evals, 'verb_disp': self.verbose, 'verb_log': self.verbose_log }) data = (self.AUC_tr_max, self.AUC_v_max, self.AUC_t_maxv) Queue.put(self.AUC_v_max) print('Solution CMA') print(es[0]) print(self.AUC_tr_max, self.AUC_v_max, self.AUC_t_maxv) print() df = pd.DataFrame([{ 'AUC': self.AUC_tr_max, 'AUC_v': self.AUC_v_max, 'AUC_t': self.AUC_t_maxv }])[['AUC', 'AUC_v', 'AUC_t']] path = self.dict_c['path_save'] + 'hist.p' df_saved = pickle_load(path, None) df_saved = df_saved.append(df, ignore_index=False) pickle_save_(path, df_saved) self.plot(df_saved)
def load_data(self, df): dict_ = {} array_data = [] keys = [ 'df_t_train', 'df_t_val', 'df_t_test', 'df_f_train', 'df_f_val', 'df_f_test' ] for i in range(len(df)): path_best = df['path'].iloc[i] + 'best/data_best.p' data = pickle_load(path_best, None) array_data.append(data) for key in keys: array_dicts = [] for data_point in range(len(array_data[0][key])): dict_DP = {} for model_id in range(len(array_data)): if (model_id == 0): data = array_data[model_id][key]['error_v'].iloc[ data_point] else: data = np.vstack((data, array_data[model_id][key] ['error_v'].iloc[data_point])) data = data.reshape(-1, self.dict_c['clusters']) dict_DP['error_e'] = data array_dicts.append(dict_DP) dict_[key] = pd.DataFrame(array_dicts, columns=[ 'error_e', 'location', 'segmentation', 'frames', 'label' ]) return dict_
def _get_data_no_cma(self, dict_data): i = dict_data['epoch'] df_t_train = dict_data['df_t_train'] df_t_val = dict_data['df_t_val'] df_t_test = dict_data['df_t_test'] df_f_train = dict_data['df_f_train'] df_f_val = dict_data['df_f_val'] df_f_test = dict_data['df_f_test'] dimension = df_t_train.iloc[0]['data_X'].shape[2] loss_f_tr = dict_data['loss_f_tr'] loss_f_v = dict_data['loss_f_v'] self.AUC_max = 0 self.AUC_min = 100 array_t = zip(list(df_t_train['data_y']), list(df_t_train['data_y_p'])) df_t_train['error_e'] = list(map(self._get_error_cma, array_t)) df_t_train['error_v'] = list( map(self.get_error_vis, df_t_train['error_e'])) df_t_train['error_m'] = list( map(self._get_error_m, df_t_train['error_v'])) array_t = zip(list(df_t_val['data_y']), list(df_t_val['data_y_p'])) df_t_val['error_e'] = list(map(self._get_error_cma, array_t)) df_t_val['error_v'] = list(map(self.get_error_vis, df_t_val['error_e'])) df_t_val['error_m'] = list(map(self._get_error_m, df_t_val['error_v'])) array_t = zip(list(df_t_test['data_y']), list(df_t_test['data_y_p'])) df_t_test['error_e'] = list(map(self._get_error_cma, array_t)) df_t_test['error_v'] = list( map(self.get_error_vis, df_t_test['error_e'])) df_t_test['error_m'] = list( map(self._get_error_m, df_t_test['error_v'])) array_f = zip(list(df_f_train['data_y']), list(df_f_train['data_y_p'])) df_f_train['error_e'] = list(map(self._get_error_cma, array_f)) df_f_train['error_v'] = list( map(self.get_error_vis, df_f_train['error_e'])) df_f_train['error_m'] = list( map(self._get_error_m, df_f_train['error_v'])) array_f = zip(list(df_f_val['data_y']), list(df_f_val['data_y_p'])) df_f_val['error_e'] = list(map(self._get_error_cma, array_f)) df_f_val['error_v'] = list(map(self.get_error_vis, df_f_val['error_e'])) df_f_val['error_m'] = list(map(self._get_error_m, df_f_val['error_v'])) array_f = zip(list(df_f_test['data_y']), list(df_f_test['data_y_p'])) df_f_test['error_e'] = list(map(self._get_error_cma, array_f)) df_f_test['error_v'] = list( map(self.get_error_vis, df_f_test['error_e'])) df_f_test['error_m'] = list( map(self._get_error_m, df_f_test['error_v'])) df_t_train_val = pd.concat([df_t_train, df_t_val]) AUC, FPR, TPR = self.get_AUC_score(df_t_train_val['error_m'], df_f_train['error_m']) AUC_v, FPR_v, TPR_v = self.get_AUC_score(df_t_train_val['error_m'], df_f_val['error_m']) AUC_t, FPR_t, TPR_t = self.get_AUC_score(df_t_test['error_m'], df_f_test['error_m']) loss_t_tr, loss_t_v, loss_t_t, loss_f_t, loss_t_v_tr = self._calc_loss( df_t_train, df_f_train, df_t_val, df_f_val, df_t_test, df_f_test) dict_data = { 'AUC': AUC, 'FPR': FPR, 'TPR': TPR, 'AUC_v': AUC_v, 'TPR_v': TPR_v, 'FPR_v': FPR_v, 'AUC_t': AUC_t, 'TPR_t': TPR_t, 'FPR_t': FPR_t, 'train_f': loss_f_tr, 'val_f': loss_f_v, 'test_f': loss_f_t, 'train_t': loss_t_tr, 'val_t': loss_t_v, 'test_t': loss_t_t, 'train_val_t': loss_t_v_tr, 'path_o': dict_data['path_o'], 'epoch': dict_data['epoch'], 'df_t_train': df_t_train, 'df_t_val': df_t_val, 'df_t_test': df_t_test, 'df_t_val_train': df_t_train_val, 'df_f_train': df_f_train, 'df_f_val': df_f_val, 'df_f_test': df_f_test, } path = dict_data['path_o'] + 'hist.p' df = pd.DataFrame([dict_data])[[ 'AUC', 'AUC_v', 'AUC_t', 'train_f', 'train_t', 'val_f', 'val_t', 'test_t', 'test_f', 'train_val_t' ]] if ('hist.p' not in os.listdir(dict_data['path_o'])): pickle_save(path, df) df_saved = df else: df_saved = pickle_load(path, None) df_saved = df_saved.append(df, ignore_index=False) pickle_save(path, df_saved) path_b = dict_data['path_o'] + 'best/' if (os.path.exists(path_b) == False): os.mkdir(path_b) if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))): pickle_save_(path_b + 'df_val.p', dict_data['df_t_val']) if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))): dict_data['df_t_train'] = dict_data['df_t_train'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_t_val'] = dict_data['df_t_val'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_t_test'] = dict_data['df_t_test'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_t_val_train'] = dict_data['df_t_val_train'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_train'] = dict_data['df_f_train'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_val'] = dict_data['df_f_val'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_test'] = dict_data['df_f_test'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] pickle_save_(path_b + 'data_best.p', dict_data) return dict_data
def get_data_dict(self, dict_, epoch): df_t_train = dict_['df_t_train'] df_t_val = dict_['df_t_val'] df_t_test = dict_['df_t_test'] df_f_train = dict_['df_f_train'] df_f_val = dict_['df_f_val'] df_f_test = dict_['df_f_test'] AUC, FPR, TPR = self.get_AUC_score(df_t_train['error_m'], df_f_train['error_m']) AUC_v, FPR_v, TPR_v = self.get_AUC_score(df_t_val['error_m'], df_f_val['error_m']) AUC_t, FPR_t, TPR_t = self.get_AUC_score(df_t_test['error_m'], df_f_test['error_m']) dict_data = { 'AUC': AUC, 'FPR': FPR, 'TPR': TPR, 'AUC_v': AUC_v, 'TPR_v': TPR_v, 'FPR_v': FPR_v, 'AUC_t': AUC_t, 'TPR_t': TPR_t, 'FPR_t': FPR_t, 'df_t_train': df_t_train, 'df_t_val': df_t_val, 'df_t_test': df_t_test, 'df_f_train': df_f_train, 'df_f_val': df_f_val, 'df_f_test': df_f_test, 'epoch': epoch } path = self.dict_c['path_save'] + 'hist.p' df = pd.DataFrame([dict_data])[['AUC', 'AUC_v', 'AUC_t']] if ('hist.p' not in os.listdir(self.dict_c['path_save'])): pickle_save_(path, df) df_saved = df else: df_saved = pickle_load(path, None) df_saved = df_saved.append(df, ignore_index=False) pickle_save_(path, df_saved) if (epoch % 10 == 0): self.plot(df_saved) path_b = self.dict_c['path_save'] + 'best/' if (os.path.exists(path_b) == False): os.mkdir(path_b) if (dict_data['AUC_v'] >= max(list(df_saved['AUC_v']))): dict_data['df_t_train'] = dict_data['df_t_train'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_t_val'] = dict_data['df_t_val'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_t_test'] = dict_data['df_t_test'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_train'] = dict_data['df_f_train'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_val'] = dict_data['df_f_val'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] dict_data['df_f_test'] = dict_data['df_f_test'][[ 'error_e', 'error_m', 'error_v', 'location', 'segmentation', 'frames', 'label' ]] pickle_save_(path_b + 'data_best.p', dict_data) return AUC_v
def save_data(self): AUC, FPR, TPR = self.AUC_tr_max, self.FPR_tr, self.TPR_tr AUC_v, FPR_v, TPR_v = self._opt_function_(self.best_x_v, self.df_f_val['error_e'], self.df_t_val['error_e']) AUC_t, FPR_t, TPR_t = self._opt_function_(self.best_x_v, self.df_f_test['error_e'], self.df_t_test['error_e']) path = self.dict_c['path_save'] + 'hist.p' df_saved = pickle_load(path, None) if (AUC_v > max(df_saved['AUC_v'])): self.df_f_train['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_f_train['error_e']))) self.df_f_val['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_f_val['error_e']))) self.df_f_test['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_f_test['error_e']))) self.df_t_train['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_t_train['error_e']))) self.df_t_val['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_t_val['error_e']))) self.df_t_test['error_v'] = list( map( functools.partial(self._get_error_ensemble, x=self.best_x_v), np.array(self.df_t_test['error_e']))) self.df_f_train['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_f_train['error_e']))) self.df_f_val['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_f_val['error_e']))) self.df_f_test['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_f_test['error_e']))) self.df_t_train['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_t_train['error_e']))) self.df_t_val['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_t_val['error_e']))) self.df_t_test['error_m'] = list( map(functools.partial(self._get_error_max, x=self.best_x_v), np.array(self.df_t_test['error_e']))) dict_ = { 'x_v': self.best_x_v, 'x_tr': self.best_x_tr, 'AUC_tr': self.AUC_tr_max, 'AUC_v': AUC_v, 'AUC_t': AUC_t, 'FPR': FPR, 'TPR': TPR, 'FPR_v': FPR_v, 'TPR_v': TPR_v, 'FPR_t': FPR_t, 'TPR_t': TPR_t, 'df_f_train': self.df_f_train, 'df_f_val': self.df_f_val, 'df_f_test': self.df_f_test, 'df_t_train': self.df_t_train, 'df_t_val': self.df_t_val, 'df_t_test': self.df_t_test, } self._configure_dir(self.dict_c['path_save'] + 'best') pickle_save(self.dict_c['path_save'] + 'best/data_best.p', dict_)