def get_predictions_BATCH(self, itype_list, image_id_list, imgs_list, batch_size = None, verbose=False): ''' Predict from a list of imgs (outputs from self.read_image) ''' for itype, image_id, imgs in zip(itype_list, image_id_list, imgs_list): #get weights if (self.weights_file is None): self.dsetID = ld.read_dsetID() if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: weight_file = self.weights_format.format(fold_id=fold_id) self.load_weights(weight_file, verbose=False) self.prev_foldID = fold_id # predict pred = self.predict_BATCH(imgs, batch_size = batch_size) # Save cache if not os.path.exists(os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join(self.path_predictions, itype, '{}_{}_pred.npy'.format(itype, image_id)) np.save(file_to_save, pred) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save)
def reset_variables(self): # Initializations self.dsetID = None self.Data = ld.FishDATA() self.SKmodel = None self.output_dir = str( self.Data.path_settings['path_outputs_{}'.format(STAGE)]) self.stage = STAGE self.model_id = MODEL_ID self.model_filename_format = '{}_{}_{}_SKmodel'.format( self.stage, self.model_id, '{fold_id}') self.path_predictions = os.path.join(self.output_dir, self.model_id) self.model_file = None self.prev_foldID = None
task = 'TRAIN' start_time = eval(exe.initiate_task_rA1.func_code) # READ MODEL Model = eval(exe.read_model_rA1.func_code) # READ PYTORCH NNMODEL & PRINT INFO import torch eval(exe.read_pytorch_NNmodel_rA1.func_code) ##### COMMON SETTINGS ##### # READ & PREPARE DATASET print('-' * 40) print('READING DATASET') Data = ld.FishDATA() dsetID = ld.read_dsetID() imgs_df = dsetID[((dsetID.itype == 'train') & (~dsetID.exclude))] #imgs_df = pd.merge(imgs_df, Data.annotations, how='inner', on='video_id') imgs_df = imgs_df.reset_index( drop=True) # Important to keep consistancy between list index imgs_df = imgs_df.assign(idf=imgs_df.index) imgs_df.rename(columns={'video_id': 'image_id'}, inplace=True) if debug_mode: print(' DEBUG MODE ACTIVATED!!!!') imgs_df = imgs_df[0:100] # READ IMAGES IN MEMORY print('LOADING IMAGES INTO RAM') def parallel_function(i_row):
verbose = EXEC_SETTINGS['verbose'] == "True" debug_mode = EXEC_SETTINGS['debug_mode'] == "True" if FORCE_DEBUG_MODE is False else True try: max_cores = MAX_CORES except: try: max_cores = int(EXEC_SETTINGS['max_cores']) except: max_cores = 99 num_cores = min(multiprocessing.cpu_count(), max_cores) # SET LOG orig_stdout = sys.stdout orig_stderr = sys.stderr sys.stdout = ld.Logger('{}{}{}_exec.log'.format(str(PATH_SETTINGS['path_log']), LOG_ID, FOLD_ID), orig_stdout) sys.stdout.log = log sys.stdout.verbose = True sys.stderr = sys.stdout # INITIATE TASK task = 'TRAIN' print('') print('-'*80) txt_warning = ' DEBUG MODE ACTIVATED!!!!' if debug_mode else '' print('{} MODEL: "{}{}" FOLD:{}{}'.format(task, MODEL_ID, EXEC_ID, FOLD_ID, txt_warning)) OUTPUT_DIR = str(PATH_SETTINGS['path_outputs_{}'.format(STAGE)]) if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) print('Output directory: {}'.format(OUTPUT_DIR))
def get_predictions(self, itype, image_id, return_imgs=False, avoid_read_weights=False, return_score=False, use_cache=None, force_save=False, verbose=True): start_time_L1 = time.time() use_cache = self.Data.exec_settings[ 'cache'] == "True" if use_cache is None else use_cache pred = None score = None score_txt = 'R2_score' if use_cache & (not force_save): try: file_to_load = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy.gz'.format(itype, image_id)) with gzip.open(file_to_load, 'rb') as f: pred = np.load(f) if not return_imgs: if verbose: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) return pred except: if verbose: print("File not in cache") imgs, labels, info = self.read_image(itype, image_id, frame='all', split_wrap_imgs=True, read_labels=(itype == 'train'), verbose=verbose) if pred is None: #get weights if (self.weights_file is None) or not avoid_read_weights: self.dsetID = ld.read_dsetID( ) if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: weight_file = self.weights_format.format(fold_id=fold_id) self.load_weights(weight_file, verbose=verbose) self.prev_foldID = fold_id # predict pred = self.predict_BATCH(imgs) # Save cache if use_cache | force_save: if not os.path.exists( os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy'.format(itype, image_id)) np.save(file_to_save, pred) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save) # evaluate if labels is not None: from sklearn.metrics import r2_score np_labels = np.vstack(labels)[:, 0] np_preds = pred[:, 0] y_true = (np_labels[np.logical_not(np.isnan(np_labels))]).astype( np.float32) y_pred = (np_preds[np.logical_not(np.isnan(np_labels))]).astype( np.float32) score = r2_score(y_true, y_pred) if verbose: if score is not None: print("Read prediction {}_{} ({}: {:.5f}) in {:.2f} s".format( itype, image_id, score_txt, score, (time.time() - start_time_L1) / 1)) else: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) if return_imgs: if return_score: return pred, imgs, labels, score else: return pred, imgs, labels if return_score: return pred, score else: return pred
def get_labels(self, itype, image_id, avoid_read_weights=False, use_cache=None, force_save=False, verbose=True): pred, img, msk, score = self.get_predictions(itype, image_id, return_img=True, avoid_read_weights=False, return_score=True, thr=self.thr, use_cache=use_cache, force_save=force_save, verbose=verbose) vidD = self.Data.load_vidDATA(itype, image_id) sPred = pred.astype(np.float32) sPred = lt.scale_image(sPred, new_size=(1280, 720), method='linear') # get regions import skimage.morphology as morph from skimage.measure import regionprops pred_thr = np.where(sPred >= self.thr, 1.0, 0.0) pred_labels = np.array([ morph.label(pred_thr[s]) for s in range(pred_thr.shape[0]) ]).astype(int) regions_lst = [ regionprops(pred_labels[s]) for s in range(pred_labels.shape[0]) ] # create list labels = [] for ich, regions in enumerate(regions_lst): if len(regions) == 0: center = (np.nan, np.nan) ang = np.nan else: region = regions[np.argmax([region.area for region in regions ])] # take biggest region center = np.round(region.centroid).astype(int) ang = np.round( region.orientation * 180 / math.pi).astype(int) + 90 if (itype == 'train'): # Annotations df = self.Data.get_S1_target_v2() i_row = df[df.video_id == image_id].iloc[0, ] T_center = (i_row.xc, i_row.yc) T_ang = i_row.ang # Distance to predicted center df = self.Data.annotations df = df[df.video_id == image_id] df = df.dropna(how="any") df = df.assign(dx1=np.abs(df.x1.values - center[0])) df = df.assign(dx2=np.abs(df.x2.values - center[0])) df = df.assign(dy1=np.abs(df.y1.values - center[1])) df = df.assign(dy2=np.abs(df.y2.values - center[1])) df = df.assign(dist1=np.sqrt( np.power(df.dx1.values, 2) + np.power(df.dy1.values, 2))) df = df.assign(dist2=np.sqrt( np.power(df.dx2.values, 2) + np.power(df.dy2.values, 2))) df = df.assign(ang1=np.abs((np.arctan((df.dy1) / (df.dx1 + 0.0001)) * 180 / math.pi))) df = df.assign(ang2=np.abs((np.arctan((df.dy2) / (df.dx2 + 0.0001)) * 180 / math.pi))) max_dist = np.round( np.max(np.array([df.dist1.values, df.dist2.values]))) max_ang = np.round( np.max(np.array([df.ang1.values, df.ang2.values]))) center_error = math.ceil( math.sqrt((center[0] - T_center[0])**2 + (center[1] - T_center[1])**2)) ang_error = abs(ang - T_ang) # evaluate if msk is not None: score = ld.dice_coef(pred[ich], msk[ich], thr=self.thr) labels.append([ image_id, ich, center[0], center[1], ang, T_center[0], T_center[1], T_ang, i_row.max_frame, center_error, ang_error, max_dist, max_ang, score ]) else: labels.append([image_id, ich, center[0], center[1], ang]) if (itype == 'train'): labels = pd.DataFrame(labels, columns=[ 'image_id', 'ich', 'xc', 'yc', 'ang', 'Txc', 'Tyc', 'Tang', 'max_frame', 'c_error', 'ang_error', 'max_dist', 'max_ang', 'dice_score' ]) else: labels = pd.DataFrame( labels, columns=['image_id', 'ich', 'xc', 'yc', 'ang']) return labels, pred, img, msk, vidD
def get_predictions(self, itype, image_id, return_img=False, avoid_read_weights=False, return_score=False, thr=0.8, use_cache=None, force_save=False, verbose=True): start_time_L1 = time.time() use_cache = self.Data.exec_settings[ 'cache'] == "True" if use_cache is None else use_cache score = None pred = None if use_cache & (not force_save): try: file_to_load = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy.gz'.format(itype, image_id)) with gzip.open(file_to_load, 'rb') as f: pred = np.load(f) if not return_img: if verbose: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) return pred, None, None except: if verbose: print("File not in cache") imgs, msk, info = self.read_image_PRED(itype, image_id, read_mask=(itype == 'train'), verbose=verbose) if pred is None: #get weights if (self.weights_file is None) or not avoid_read_weights: self.dsetID = ld.read_dsetID( ) if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: weight_file = self.weights_format.format(fold_id=fold_id) self.load_weights(weight_file, verbose=verbose) self.prev_foldID = fold_id # predict preds = self.predict_BATCH(imgs) pred = np.max(np.array(preds), axis=0) ##### MAX!!! # Save cache if use_cache | force_save: if not os.path.exists( os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy'.format(itype, image_id)) np.save(file_to_save, pred) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save) # evaluate if msk is not None: score = ld.dice_coef(pred[0], msk[0], thr=thr) if verbose: if score is not None: print( "Read prediction {}_{} (score: {:.5f}) in {:.2f} s".format( itype, image_id, score, (time.time() - start_time_L1) / 1)) else: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) if return_img: if return_score: return pred, imgs[0], msk, score else: return pred, imgs[0], msk if return_score: return pred, score else: return pred
def get_predictions(self, itype, image_id, return_imgs=False, avoid_read_weights=False, return_score=False, use_cache=None, force_save=False, verbose=True): start_time_L1 = time.time() use_cache = self.Data.exec_settings[ 'cache'] == "True" if use_cache is None else use_cache labels = None score = None pred = None score_txt = 'dice_coef' if use_cache & (not force_save): try: file_to_load = os.path.join( self.path_predictions, itype, '{}_{}_pred.csv.gz'.format(itype, image_id)) labels = pd.read_csv(file_to_load) if not return_imgs: if verbose: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) return labels except: if verbose: print("File not in cache") imgs, msks, info = self.read_image(itype, image_id, frame='all', split_wrap_imgs=True, read_labels=(itype == 'train'), verbose=verbose) if labels is None: #get weights if (self.weights_file is None) or not avoid_read_weights: self.dsetID = ld.read_dsetID( ) if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: weight_file = self.weights_format.format(fold_id=fold_id) self.load_weights(weight_file, verbose=verbose) self.prev_foldID = fold_id # predict pred = self.predict_BATCH(imgs) #scale predictions sPred = pred[:, 0, ...].astype(np.float32) sPred = lt.scale_image(sPred, new_size=self.pp_patch_size, method='linear') # get regions import skimage.morphology as morph from skimage.measure import regionprops pred_thr = np.where(sPred >= self.thr, 1.0, 0.0) pred_labels = np.array([ morph.label(pred_thr[s]) for s in range(pred_thr.shape[0]) ]).astype(int) regions_lst = [ regionprops(pred_labels[s]) for s in range(pred_labels.shape[0]) ] # create list labels = [] for ich, regions in enumerate(regions_lst): if len(regions) == 0: center = (np.nan, np.nan) ang = np.nan length = np.nan else: region = regions[np.argmax([ region.area for region in regions ])] # take biggest region center = np.round(region.centroid).astype(int) ang = np.round( region.orientation * 180 / math.pi).astype(int) + 90 length = int(math.ceil(region.major_axis_length)) labels.append( [image_id, ich, center[0], center[1], ang, length]) labels = pd.DataFrame( labels, columns=['image_id', 'ich', 'xc', 'yc', 'ang', 'length']) # Save cache if use_cache | force_save: if not os.path.exists( os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join( self.path_predictions, itype, '{}_{}_pred.csv.gz'.format(itype, image_id)) labels.to_csv(file_to_save, index=False, compression='gzip') # evaluate if (msks is not None) and (pred is not None): pp_labels = [ self.data_transforms['test'](s1, s1)[1] for s1 in msks ] select = [np.sum(s1) > 0 for s1 in pp_labels] np_labels = [s1 for s1, s2 in zip(pp_labels, select) if s2] np_labels = np.vstack(np_labels) np_preds = [s1 for s1, s2 in zip(pred, select) if s2] np_preds = np.vstack(np_preds) score = ld.dice_coef(np_preds, np_labels, thr=self.thr) if verbose: if score is not None: print("Read prediction {}_{} ({}: {:.5f}) in {:.2f} s".format( itype, image_id, score_txt, score, (time.time() - start_time_L1) / 1)) else: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) if return_imgs: if return_score: return labels, imgs, msks, score else: return labels, imgs, msks if return_score: return labels, score else: return labels
def get_predictions_raw(self, itype, image_id, return_imgs=False, avoid_read_weights=False, return_score=False, thr=0.8, use_cache=None, force_save=False, verbose=True): start_time_L1 = time.time() use_cache = self.Data.exec_settings[ 'cache'] == "True" if use_cache is None else use_cache pred = None score = None score_txt = 'dice_coef' if use_cache & (not force_save): try: file_to_load = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy.gz'.format(itype, image_id)) with gzip.open(file_to_load, 'rb') as f: pred = np.load(f) if not return_imgs: if verbose: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) return pred except: if verbose: print("File not in cache") imgs, labels, info = self.read_image(itype, image_id, frame='all', split_wrap_imgs=True, read_labels=(itype == 'train'), verbose=verbose) if pred is None: #get weights if (self.weights_file is None) or not avoid_read_weights: self.dsetID = ld.read_dsetID( ) if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: weight_file = self.weights_format.format(fold_id=fold_id) self.load_weights(weight_file, verbose=verbose) self.prev_foldID = fold_id # predict pred = self.predict_BATCH(imgs) # Save cache if use_cache | force_save: if not os.path.exists( os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy'.format(itype, image_id)) np.save(file_to_save, pred) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save) # evaluate if labels is not None: pp_labels = [ self.data_transforms['test'](s1, s1)[1] for s1 in labels ] select = [np.sum(s1) > 0 for s1 in pp_labels] np_labels = [s1 for s1, s2 in zip(pp_labels, select) if s2] np_labels = np.vstack(np_labels) np_preds = [s1 for s1, s2 in zip(pred, select) if s2] np_preds = np.vstack(np_preds) score = ld.dice_coef(np_preds, np_labels, thr=thr) if verbose: if score is not None: print("Read prediction {}_{} ({}: {:.5f}) in {:.2f} s".format( itype, image_id, score_txt, score, (time.time() - start_time_L1) / 1)) else: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) if return_imgs: if return_score: return pred, imgs, labels, score else: return pred, imgs, labels if return_score: return pred, score else: return pred
def reset_variables(self): # Initializations self.dsetID = None self.Data = ld.FishDATA()
def read_image( self, itype, image_id, frame='example', # int, 'all', 'example'(0) #'all_labeled' --> only if training read_targets=False, use_cache=None, verbose=False): '''Custom read_image function for this model. ''' start_time_L1 = time.time() targets = None # Read image. vidD = self.Data.load_vidDATA(itype, image_id) # Read annotations df = self.Data.annotations mini_df = df[df.video_id == image_id] mini_df = mini_df[np.logical_not(np.isnan(mini_df.fish_number))] nb_frames = len(mini_df) # Create frames list if frame == 'all': frames = range(len(vidD.vi)) elif frame == 'example': frames = [ 0, ] elif frame == 'all_labeled' and nb_frames > 0: frames = mini_df.frame.values.tolist() else: frames = [ int(frame), ] # Start df cols = {} cols['itype'] = itype cols['image_id'] = image_id # Ax, no-fish prediction Model = self.PREV_Model[0] pred = Model.get_predictions(itype, image_id, return_imgs=False, use_cache=use_cache, verbose=verbose) pred = pred[:, 0] cols['frame'] = range(pred.shape[0]) cols['frames_left'] = pred.shape[0] - np.arange(pred.shape[0]) cols['AxPred'] = pred cols['AxM3'] = ld.mov_avg(pred, 1, 1) cols['AxM5'] = ld.mov_avg(pred, 2, 2) cols['AxM3i1'] = ld.mov_avg(pred, 3, 1) cols['AxM1i3'] = ld.mov_avg(pred, 1, 3) cols['AxM11'] = ld.mov_avg(pred, 5, 5) cols['AxM7i3'] = ld.mov_avg(pred, 7, 3) cols['AxM3i7'] = ld.mov_avg(pred, 3, 7) cols['AxM5max'] = ld.mov_func(pred, 2, 2, np.max) cols['AxM5min'] = ld.mov_func(pred, 2, 2, np.min) cols['AxM5std'] = ld.mov_func(pred, 2, 2, np.std) cols['AxM0i5max'] = ld.mov_func(pred, 0, 5, np.max) cols['AxM0i5min'] = ld.mov_func(pred, 0, 5, np.min) cols['AxM0i5std'] = ld.mov_func(pred, 0, 5, np.std) cols['AxM5i0max'] = ld.mov_func(pred, 5, 0, np.max) cols['AxM5i0min'] = ld.mov_func(pred, 5, 0, np.min) cols['AxM5i0std'] = ld.mov_func(pred, 5, 0, np.std) # Dx, best frame prediction Model = self.PREV_Model[1] pred = Model.get_predictions(itype, image_id, return_imgs=False, use_cache=use_cache, verbose=verbose) pred = pred[:, 0] cols['DxPred'] = pred cols['DxP1'] = np.concatenate([np.repeat(pred[0], 1), pred[:-1]]) cols['DxP2'] = np.concatenate([np.repeat(pred[0], 2), pred[:-2]]) cols['DxP3'] = np.concatenate([np.repeat(pred[0], 3), pred[:-3]]) cols['DxN1'] = np.concatenate([pred[1:], np.repeat(pred[-1], 1)]) cols['DxN2'] = np.concatenate([pred[2:], np.repeat(pred[-1], 2)]) cols['DxN3'] = np.concatenate([pred[3:], np.repeat(pred[-1], 3)]) cols['DxM3'] = ld.mov_avg(pred, 1, 1) cols['DxM5'] = ld.mov_avg(pred, 2, 2) cols['DxM3i1'] = ld.mov_avg(pred, 3, 1) cols['DxM1i3'] = ld.mov_avg(pred, 1, 3) # Ex, 4 frames after best frame prediction Model = self.PREV_Model[2] pred = Model.get_predictions(itype, image_id, return_imgs=False, use_cache=use_cache, verbose=verbose) pred = pred[:, 0] cols['FxPred'] = pred cols['FxP1'] = np.concatenate([np.repeat(pred[0], 1), pred[:-1]]) cols['FxP2'] = np.concatenate([np.repeat(pred[0], 2), pred[:-2]]) cols['FxP3'] = np.concatenate([np.repeat(pred[0], 3), pred[:-3]]) cols['FxP4'] = np.concatenate([np.repeat(pred[0], 4), pred[:-4]]) cols['FxP5'] = np.concatenate([np.repeat(pred[0], 5), pred[:-5]]) cols['FxP6'] = np.concatenate([np.repeat(pred[0], 6), pred[:-6]]) cols['FxM3'] = ld.mov_avg(pred, 1, 1) cols['FxM5'] = ld.mov_avg(pred, 2, 2) cols['FxM5i1'] = ld.mov_avg(pred, 5, 1) cols['FxM1i5'] = ld.mov_avg(pred, 1, 5) # Data Frame dset_df = pd.DataFrame(cols) init_dset = dset_df.loc[:,[s for s in dset_df.columns if s not in \ ['itype', 'image_id', 'frames_left', 'target']]].copy() tmp_dset = init_dset.copy() tmp_dset['frame'] = tmp_dset['frame'].values - 5 tmp_dset.columns = [ s + '_p5' if s != 'frame' else s for s in tmp_dset.columns ] dset_df = pd.merge(dset_df, tmp_dset, how='left', on='frame') tmp_dset = init_dset.copy() tmp_dset['frame'] = tmp_dset['frame'].values - 5 tmp_dset.columns = [ s + '_p10' if s != 'frame' else s for s in tmp_dset.columns ] dset_df = pd.merge(dset_df, tmp_dset, how='left', on='frame') tmp_dset = init_dset.copy() tmp_dset['frame'] = tmp_dset['frame'].values + 5 tmp_dset.columns = [ s + '_n5' if s != 'frame' else s for s in tmp_dset.columns ] dset_df = pd.merge(dset_df, tmp_dset, how='left', on='frame') tmp_dset = init_dset.copy() tmp_dset['frame'] = tmp_dset['frame'].values + 5 tmp_dset.columns = [ s + '_n10' if s != 'frame' else s for s in tmp_dset.columns ] dset_df = pd.merge(dset_df, tmp_dset, how='left', on='frame') # Add target if read_targets: targets = mini_df[['frame']] targets = targets.assign(target=1) if targets is None: dset_df = dset_df.assign(target=0) else: dset_df = pd.merge(dset_df, targets, how='left', on='frame') dset_df = dset_df.assign( target=np.nan_to_num(dset_df.target.values)) # Convert infinites and nans dset_df = dset_df.replace([np.inf, -np.inf, np.nan], 0) # Filter frames dset_df = dset_df.iloc[[s in frames for s in dset_df.frame], :] if verbose: print("Read image {} dataset in {:.2f} s".format( image_id, (time.time() - start_time_L1) / 1)) return dset_df
def get_predictions(self, itype, image_id, return_dset=False, avoid_read_model=False, return_score=False, use_cache=None, force_save=False, verbose=True): start_time_L1 = time.time() use_cache = self.Data.exec_settings[ 'cache'] == "True" if use_cache is None else use_cache pred = None score = None score_txt = 'mse' if use_cache & (not force_save): try: file_to_load = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy.gz'.format(itype, image_id)) with gzip.open(file_to_load, 'rb') as f: pred = np.load(f) if not return_dset: if verbose: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) return pred except: if verbose: print("File not in cache") dset = self.read_image(itype, image_id, frame='all', read_targets=(itype == 'train'), verbose=verbose) if pred is None: #get model if (self.model_file is None) or not avoid_read_model: self.dsetID = ld.read_dsetID( ) if self.dsetID is None else self.dsetID fold_id = self.dsetID.loc[(self.dsetID.video_id == image_id) & (self.dsetID.itype == itype), self.fold_column] fold_id = fold_id.values[0] if self.prev_foldID != fold_id: model_filename = self.model_filename_format.format( fold_id=fold_id) self.load_model(model_filename, verbose=verbose) self.prev_foldID = fold_id # predict pred = self.predict(dset) # Save cache if use_cache | force_save: if not os.path.exists( os.path.join(self.path_predictions, itype)): os.makedirs(os.path.join(self.path_predictions, itype)) file_to_save = os.path.join( self.path_predictions, itype, '{}_{}_pred.npy'.format(itype, image_id)) np.save(file_to_save, pred) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save) # evaluate labels = None if (itype == 'train'): labels = labels = dset['target'].values.astype(np.float32) from sklearn.metrics import log_loss y_true = labels[~np.isnan(labels)] y_pred = pred[~np.isnan(labels)] score = log_loss(y_true, y_pred) if verbose: if score is not None: print("Read prediction {}_{} ({}: {:.5f}) in {:.2f} s".format( itype, image_id, score_txt, score, (time.time() - start_time_L1) / 1)) else: print("Read prediction {}_{} in {:.2f} s".format( itype, image_id, (time.time() - start_time_L1) / 1)) if return_dset: if return_score: return pred, dset, labels, score else: return pred, dset, labels if return_score: return pred, score else: return pred
def ppS1B(itype, video_id, Data=None, use_cache=True, force_save=False, cache_only_training=True, verbose=False): """ Read pre-process images from cache """ start_time_L1 = time.time() # Parameters pp_file_name = 'S1B' final_size = (352, 192) fps = 1 # DATA class Data = ld.FishDATA() if Data is None else Data # Read cache if cache_only_training and itype != 'train': use_cache = False force_save = False if use_cache & (not force_save): try: file_to_load = os.path.join(Data.pp_data, '{}_{}'.format(pp_file_name, itype), '{}.npy.gz'.format(video_id)) with gzip.open(file_to_load, 'rb') as f: imgs = np.load(f) if verbose: print("Read video {}_{}_{} in {:.2f} s".format( pp_file_name, itype, video_id, (time.time() - start_time_L1) / 1)) return imgs except: if verbose: print("File not in cache") # Load video vidD = Data.load_vidDATA(itype, video_id) # Get video metadata nb_frames = len(vidD.vi) vi_fps = vidD.vi._meta['fps'] # extract frames imgs = [] for i in range(0, nb_frames, int(vi_fps / float(fps))): # Extract frame img = vidD.vi.get_data(i) # Convert to B&W im = Image.fromarray(img) im = im.convert('L') # Resize im = im.resize(final_size, Image.BICUBIC) # Convert to np.array img = np.transpose(np.array(im), (1, 0)) img = img[np.newaxis, ...] imgs.append(img) imgs = np.array(imgs) # Save Images if use_cache | force_save: outputdir = os.path.join(Data.pp_data, '{}_{}'.format(pp_file_name, itype)) if not os.path.exists(outputdir): os.makedirs(outputdir) file_to_save = os.path.join(outputdir, '{}.npy'.format(video_id)) np.save(file_to_save, imgs) with open(file_to_save, 'rb') as f_in, gzip.open(file_to_save + '.gz', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(file_to_save) # Return images if verbose: print("Read video {}_{}_{} in {:.2f} s".format( pp_file_name, itype, video_id, (time.time() - start_time_L1) / 1)) return imgs
def ppPCH01(itype, video_id, frame, Data=None, vidD=None, S1_Model_DF=None, use_cache=True, force_save=False, cache_only_training=True, verbose=False): """ Read pre-process images from cache """ start_time_L1 = time.time() # Parameters S1_STAGE, S1_MODEL_ID = 'S1_ROI', 'NN_AxC01' pp_file_name = 'PCH01' init_patch_size = (448, 224) # DATA class Data = ld.FishDATA() if Data is None else Data # Filename outputdir = os.path.join(Data.pp_data, '{}_{}'.format(pp_file_name, itype)) if not os.path.exists(outputdir): os.makedirs(outputdir) filename = os.path.join(outputdir, '{}_{}.jpg'.format(video_id, frame)) # Read cache if cache_only_training and itype != 'train': use_cache = False force_save = False if use_cache & (not force_save): try: patch = Image.open(filename) if verbose: print("Read patch {}_{}_{} in {:.2f} s".format( pp_file_name, itype, video_id, (time.time() - start_time_L1) / 1)) return patch except: if verbose: print("File not in cache") # VIDEO class vidD = Data.load_vidDATA(itype, video_id) if vidD is None else vidD # BBOX if S1_Model_DF is None: S1_src_file = 'src/{}_models/{}_{}_model.py'.format( S1_STAGE, S1_STAGE, S1_MODEL_ID) S1_Model = imp.load_source('', S1_src_file).Model('test') filename = os.path.join( S1_Model.path_predictions, '{}_{}_pred.csv.gz'.format(S1_STAGE, S1_MODEL_ID)) try: S1_Model_DF = pd.read_csv(filename) except: S1_Model_DF = None try: bbox_df = S1_Model_DF bbox_df = bbox_df[bbox_df.image_id == video_id] bbox_df = bbox_df[bbox_df.ich == 0] bbox_irow = bbox_df.iloc[0] except: bbox_df, _, _, _, _ = S1_Model.get_labels(itype, video_id, use_cache=use_cache, verbose=verbose) bbox_df = bbox_df[bbox_df.image_id == video_id] bbox_df = bbox_df[bbox_df.ich == 0] bbox_irow = bbox_df.iloc[0] xc, yc, ang = int(bbox_irow.xc), int(bbox_irow.yc), int(bbox_irow.ang) # Extract patches patch = Data.extract_patch_PIL(Image.fromarray(vidD.vi.get_data(frame)), (xc, yc), ang, size=init_patch_size) # Save Images if use_cache | force_save: patch.save(filename, quality=95, optimize=True) # Return images if verbose: print("Read patch {}_{}_{} in {:.2f} s".format( pp_file_name, itype, video_id, (time.time() - start_time_L1) / 1)) return patch
def read_image( self, itype, image_id, frame='example', # int, 'all', 'example'(0) #'all_labeled' --> only if training #'all_train' --> only if training read_targets=False, use_cache=None, verbose=False): '''Custom read_image function for this model. ''' start_time_L1 = time.time() targets = None # Read image. vidD = self.Data.load_vidDATA(itype, image_id) # Read annotations df = self.Data.annotations mini_df = df[df.video_id == image_id] mini_df = mini_df[np.logical_not(np.isnan(mini_df.fish_number))] nb_frames = len(mini_df) # Create frames list if frame == 'all': frames = range(len(vidD.vi)) elif frame == 'example': frames = [ 0, ] elif frame == 'all_labeled' and nb_frames > 0: frames = mini_df.frame.values.tolist() elif frame == 'all_train' and nb_frames > 0: i_frames = mini_df.frame.values.tolist() frames = [[ s, ] for s in i_frames] frames = [s for ss in frames for s in ss] frames = np.unique(np.clip(frames, 0, len(vidD.vi) - 1)) else: frames = [ int(frame), ] # Start df cols = {} cols['itype'] = itype cols['image_id'] = image_id # Bx, fish_length i_model = -1 bx_models = 2 for i in range(bx_models): i_model += 1 Model = self.PREV_Model[i_model] pred = Model.get_predictions(itype, image_id, return_imgs=False, use_cache=use_cache, verbose=verbose) pred = pred[:, 0] cols['Bx{}_{}Pred'.format(i_model, i)] = pred cols['Bx{}_{}M3'.format(i_model, i)] = ld.mov_avg(pred, 1, 1) cols['Bx{}_{}M5'.format(i_model, i)] = ld.mov_avg(pred, 2, 2) cols['Bx{}_{}M11'.format(i_model, i)] = ld.mov_avg(pred, 5, 5) cols['Bx{}_{}M0i3max'.format(i_model, i)] = ld.mov_func( pred, 0, 3, np.nanmax) cols['Bx{}_{}M3i0max'.format(i_model, i)] = ld.mov_func( pred, 3, 0, np.nanmax) cols['Bx{}_{}M0i5max'.format(i_model, i)] = ld.mov_func( pred, 0, 5, np.nanmax) cols['Bx{}_{}M5i0max'.format(i_model, i)] = ld.mov_func( pred, 5, 0, np.nanmax) cols['Bx{}_{}M0i11max'.format(i_model, i)] = ld.mov_func( pred, 0, 11, np.nanmax) cols['Bx{}_{}M11i0max'.format(i_model, i)] = ld.mov_func( pred, 11, 0, np.nanmax) # Bx, fish_bbox i_model += 1 Model = self.PREV_Model[i_model] pred = Model.get_predictions(itype, image_id, return_imgs=False, use_cache=use_cache, verbose=verbose) pred = pred.iloc[:, 4:].values for i in range(pred.shape[1]): cols['Bx{}_{}Pred'.format(i_model, i)] = pred[:, i] cols['Bx{}_{}M3'.format(i_model, i)] = ld.mov_avg(pred[:, i], 1, 1) cols['Bx{}_{}M5'.format(i_model, i)] = ld.mov_avg(pred[:, i], 2, 2) cols['Bx{}_{}M11'.format(i_model, i)] = ld.mov_avg(pred[:, i], 5, 5) # Data Frame dset_df = pd.DataFrame(cols) # Convert infinites and nans dset_df = dset_df.replace([np.inf, -np.inf, np.nan], 0) # Add fetaures dset_df = dset_df.assign(frame=np.arange(len(dset_df))) dset_df = dset_df.assign(sample_weight=1 / float(len(dset_df))) dset_df = dset_df.assign(frames_left=len(dset_df) - dset_df.frame.values) # Add target if read_targets: targets = mini_df[[ 'frame', ] + [ 'length', ]] if targets is not None: dset_df = pd.merge(dset_df, targets, how='left', on='frame') # Filter frames dset_df = dset_df.iloc[[s in frames for s in dset_df.frame], :] if verbose: print("Read image {} dataset in {:.2f} s".format( image_id, (time.time() - start_time_L1) / 1)) return dset_df