def _load_train_original_coordinates(self): """ Load the coordinates CSV for the training dataset. Turn it into a dictionary of lists of coordinates and labels. {image_id: [ { x_coord: n y_coord: n category: label } ] } :return: A dictionary of lists of coordinates and categories. """ logger.debug('Loading train image coordinates') d = collections.defaultdict(list) with open(settings.TRAIN_COORDINATES_CSV, 'r') as file: [ d[utils.get_file_name_part(row['filename'])].append( utils.remove_key_from_dict(row, '', 'id', 'filename')) for row in csv.DictReader(file) ] return dict(d)
def _load_sea_lion_crop_images(self): images = [] crops_dir = settings.SEA_LION_CROPS_DIR # Get all images filenames = sorted(glob.glob(os.path.join(crops_dir, "*.jpg"))) for filename in filenames: #adult_males_id923_1clions_at_1944-1425_197px #negative_id0_0clions_at_13-913_197px name = utils.get_file_name_part(filename) name_parts = name.split('_id') clss = name_parts[0] image_name = name_parts[1].split('_')[0] if image_name in self.train_original_mismatched: # Skip images marked as mismatched continue meta = {'full_name': name, 'filename': image_name} images.append({ 'x': (lambda filename: lambda: self.load(filename))(filename), 'm': meta, 'y': clss }) return images
def generate_obms_fast2(): cpus = 20 def chunks(l, n): """Yield successive n-sized chunks from lr.""" chunks = [] chunk_size = int(len(l) / n) for i in range(n + 1): chunks.append(l[i * chunk_size:(i + 1) * chunk_size]) return chunks import random #impaths = sorted(glob.glob(os.path.join(settings.TEST_DIR,'original','*'))) lo = sorted(glob.glob(os.path.join(settings.TRAIN_HEATMAP_DIR, '*'))) impaths = [] print(len(lo)) for impath in lo: filename = utils.get_file_name_part(impath) nclions = int(filename.split('clions')[0]) if nclions == 0 and random.choice([0, 0, 1, 1, 1, 1, 1]): continue else: impaths.append(impath) print(len(impaths)) impaths = np.array(impaths) impaths = chunks(impaths, cpus) print(999, len(impaths)) for a in impaths: print(len(a)) print(str(a)[:500]) with Pool(cpus) as pool: pool.starmap(generate_obms2, zip(impaths))
def plot_count_crops_statistics(self, crops_folder): #crops_folder = '/vol/tensusers/vgarciacazorla/MLP/noaa-sea-lion-count/output/crops/20170609T022425/heatmap' counts = [] filepaths = glob.glob(os.path.join(crops_folder, '*.jpg')) for filepath in filepaths: filename = utils.get_file_name_part(filepath) counts.append(int(filename.split('clions')[0])) counts = sorted(counts) plt.figure() plt.plot(counts) plt.title("Including negative crops") plt.show() counts = [c for c in counts if c != 0] plt.figure() plt.plot(counts) plt.title("Excluding negative crops") plt.show() plt.figure()
def _load_region_crop_images(self): images = [] crops_dir = settings.REGION_CROPS_DIR # Get all images filenames_pos = sorted( glob.glob(os.path.join(crops_dir, 'pos', "*.jpg"))) filenames_neg = sorted( glob.glob(os.path.join(crops_dir, 'neg', "*.jpg"))) filenames = filenames_pos + filenames_neg for filename in filenames: #10clions_at40-1680_in66_400px #0clions_at0-2471_in579_400px name = utils.get_file_name_part(filename) #image_name = name.split('_')[1].split('d')[1] image_name = name.split('in')[1].split('_')[0] if image_name in self.train_original_mismatched: # Skip images marked as mismatched continue y = filename.split(os.path.sep)[-2] assert y in ['pos', 'neg'] if y == 'pos': y = 'positive' else: y = 'negative' meta = { 'full_name': name, 'filename': image_name, 'coordinates': name.split('_')[1][2:], 'counts': int(name.split('clions')[0]), } images.append({ 'x': (lambda filename: lambda: self.load(filename))(filename), 'm': meta, 'y': y }) return images
def load_test_coordinates(self): """ Load the coordinates CSV for the test dataset. Turn it into a dictionary of lists of coordinates. {image_id: [ { x_coord: n y_coord: n } ] } :return: A dictionary of lists of coordinates. """ logger.debug('Loading test image coordinates') d = collections.defaultdict(list) with open(settings.TEST_COORDINATES_CSV, 'r') as file: [ d[utils.get_file_name_part(row['filename'])].append( utils.remove_key_from_dict(row, '', 'filename')) for row in csv.DictReader(file) ] return dict(d)
def generate_overlap_masks(): if not os.path.exists(settings.OVERLAP_MASKS_DIR): os.makedirs(settings.OVERLAP_MASKS_DIR) with open(settings.TRAIN_MISMATCHED_CSV, 'r') as file: mismatched = {row['train_id']: True for row in csv.DictReader(file)} n = 0 filenames = sorted( glob.glob(os.path.join(settings.TRAIN_DOTTED_IMAGES_DIR, "*.jpg"))) for filename in filenames: logger.debug('Generating overlap mask for image %s ...' % n) n += 1 name = utils.get_file_name_part(filename) if name in mismatched: continue img = scipy.misc.imread(filename).astype("float32") mask = np.sum(img, 2) > 0 maskname = os.path.join(settings.OVERLAP_MASKS_DIR, name + '.mask') with gzip.open(maskname, 'wb') as outfile: pickle.dump(mask, outfile)
def generate_obms2(impaths): data_type = 'original_test' #data_type = 'region_crops' prediction_class_type = 'odm' #prediction_class_type = 'single' validate = False class_balancing = False input_shape = (224, 224, 3) batch_size = 1 crop_size = 400 arch = 'xception' input_weights_name = 'xception-lay106-heatmap_crops-ep011-tloss0.0068-vloss0.0067.hdf5' tl = network.TransferLearningSeaLionHeatmap( data_type=data_type, input_shape=input_shape, prediction_class_type=prediction_class_type, class_balancing=class_balancing, mini_batch_size=batch_size, validate=validate) #tl = network.TransferLearningSeaLionOrNoSeaLion(data_type = data_type, input_shape = input_shape, prediction_class_type = prediction_class_type, class_balancing= class_balancing, mini_batch_size=mini_batch_size, validate = validate) tl.build(arch, input_shape=input_shape) tl.load_weights(input_weights_name) cnn_output_shape = tl.model.layers[-1].output_shape[1:-1] for impath in list(impaths): im = scipy.misc.imread(impath) im = im / im.max() im = np.expand_dims(im, axis=0) obm = tl.model.predict(im) filename = utils.get_file_name_part(impath) print(im.shape, im.mean(), obm.shape, obm.mean(), filename) nclions = int(filename.split('clions')[0]) np.save( os.path.join(settings.OBMS_OUTPUT_DIR, 'train', filename + '_obm_train'), obm)
def load_original_images(self, dataset="train"): """ Load the data """ images = [] if dataset == "train": logger.debug('Loading train set original images') # Get all train original images filenames = sorted( glob.glob( os.path.join(settings.TRAIN_ORIGINAL_IMAGES_DIR, "*.jpg"))) for filename in filenames: name = utils.get_file_name_part(filename) if name in self.train_original_mismatched: # Skip images marked as mismatched continue meta = { 'filename': name, 'coordinates': self.train_original_coordinates[name] if name in self.train_original_coordinates else [], 'counts': self.train_original_counts[name] } images.append({ 'x': (lambda filename: lambda: self.load(filename))(filename), 'm': meta }) elif dataset == "test_st1": logger.debug('Loading stage 1 test set original images') # Get all test original images filenames = sorted( glob.glob( os.path.join(settings.TEST_ORIGINAL_IMAGES_DIR, "*.jpg"))) for filename in filenames: name = utils.get_file_name_part(filename) #if name in self.train_original_mismatched: # # Skip images marked as mismatched # continue meta = {'filename': name} images.append({ 'x': (lambda filename: lambda: self.load(filename))(filename), 'm': meta }) return images
def load_full_size_feature_images(self, dataset="train"): """ Load full size density map features. The output is: [ # A list of dicts for each unique original image, containing all features corresponding to that image { 'features': { # Feature bank; a dictionary that groups feature types together (e.g., all LOGs are grouped) <feature name>: { # A dictionary mapping from specific feature type settings to feature images <feature setting>: <function to load feature image> } }, 'meta': { 'image_name': <image id>, # train only: 'coordinates': [ # A list of sea lion coordinates within the original image { 'x': <x coordinate>, 'y': <y coordinate>, 'category': <sea lion type> } ], 'counts': <total categorized count of sea lions in the image> } } ] """ images = {} if dataset == "train": logger.debug("Loading train set full-size feature images") features_dir = settings.TRAIN_FEATURES_DIR train = True elif dataset == "test_st1": logger.debug("Loading test set full-size feature images") features_dir = settings.TEST_FEATURES_DIR train = False # Get all images filenames = glob.glob(os.path.join(features_dir, "*.png")) for filename in filenames: # <image id>_<feature name>-<feature setting>.jpg name = utils.get_file_name_part(filename) name_parts = name.split('_') image_id = name_parts[0] feature_parts = name_parts[1].split('-') feature_name = feature_parts[0] feature_setting = feature_parts[1] if image_id in self.train_original_mismatched: # Skip images marked as mismatched continue # Add base image if it does not exist yet if image_id not in images: if train: meta = { 'image_name': image_id, 'coordinates': self.train_original_coordinates[name] if name in self.train_original_coordinates else [], 'counts': self.train_original_counts[name] } else: meta = {'image_name': image_id} images[image_id] = {'features': {}, 'meta': meta} # Add feature group if it does not exist yet if feature_name not in images[image_id]['features']: images[image_id]['features'][feature_name] = {} # Add feature images[image_id]['features'][feature_name][feature_setting] = ( lambda filename: lambda: self.load(filename))(filename) # Turn into list images = [img for img in images.values()] images = sorted(images, key=lambda img: img['meta']['image_name']) return images
def load_density_map_feature_crops(self): """ Load density map feature patches. The output is: [ # A list of dicts for each unique image patch, containing all features corresponding to that patch { 'features': { # Feature bank; a dictionary that groups feature types together (e.g., all LOGs are grouped) <feature name>: { # A dictionary mapping from specific feature type settings to feature images <feature setting>: <function to load feature image> } }, 'meta': { 'image_name': <image id>, 'patch': { # Patch coordinates 'x': <left x coordinate>, 'y': <top y coordinate>, 'width': <width>, 'height': <height> }, 'coordinates': [ # A list of sea lion coordinates within the patch, with coordinates relative to the patch { 'x': <x coordinate>, 'y': <y coordinate>, 'category': <sea lion type> } ] } } ] """ logger.debug('Loading density map features') images = {} crops_dir = settings.DENSITY_MAP_FEATURE_CROPS_DIR # Get all images #filenames = sorted(glob.glob(os.path.join(crops_dir,"*.png"))) filenames = sorted( [f for f in os.listdir(crops_dir) if f[-4:] == ".png"]) n = 0 for filename in filenames: # <image id>_<crop x coordinate>-<crop y coordinate>-<crop width>-<crop height>_<feature name>-<feature setting>.jpg name = utils.get_file_name_part(filename) name_parts = name.split('_') image_id = name_parts[0] coordinate_parts = name_parts[1].split('-') feature_parts = name_parts[2].split('-') bounding_box = { 'x': 2 * int(coordinate_parts[0]), 'y': 2 * int(coordinate_parts[1]), 'width': 2 * int(coordinate_parts[2]), 'height': 2 * int(coordinate_parts[3]) } feature_name = feature_parts[0] feature_setting = feature_parts[1] key = str((image_id, bounding_box.values())) if image_id in self.train_original_mismatched: # Skip images marked as mismatched continue n += 1 # Add image patch if it does not exist yet if key not in images: # Get coordinates of sea lions in the original full-size image orig_coordinates = self.train_original_coordinates[image_id] if image_id in self.train_original_coordinates else [] # Get all sea lion coordinates that are within (or very close to) this patch and transform coordinates to the patch coordinate base coordinates = [] for coord in orig_coordinates: x = int(float(coord['x_coord'])) y = int(float(coord['y_coord'])) if (bounding_box['x'] - 150 <= x < bounding_box['x'] + bounding_box['width'] + 150 and bounding_box['y'] - 150 <= y < bounding_box['y'] + bounding_box['height'] + 150): coordinates.append({ 'x_coord': x - bounding_box['x'], 'y_coord': y - bounding_box['y'], 'category': coord['category'] }) images[key] = { 'features': {}, 'meta': { 'image_name': image_id, 'patch': bounding_box, 'coordinates': coordinates } } # Add feature group if it does not exist yet if feature_name not in images[key]['features']: images[key]['features'][feature_name] = {} # Add feature images[key]['features'][feature_name][feature_setting] = ( lambda filename: lambda: self.load(filename))(os.path.join( crops_dir, filename)) # Turn into list images = [img for img in images.values()] logger.debug('Loaded %s features for %s images' % (n, len(images))) return images
def _load_heatmap_crop_images(self): import cropping """ Loads the heatmap crops and generates the object densitiy maps (odm) """ odm_original_size = 400 odm_target_size = 80 skip_pups = True i = 0 #Build the type of marks for each type of sealion marks = { 'adult_males': utils.get_gaussian_mark(3.), 'subadult_males': utils.get_gaussian_mark(3.), 'juveniles': utils.get_gaussian_mark(2.5), 'pups': utils.get_gaussian_mark(0.7), 'adult_females': utils.get_gaussian_mark(2.5) } images = [] filepaths = glob.glob(os.path.join(settings.TRAIN_HEATMAP_DIR, '*.jpg')) if 0: #for debug filepaths = filepaths[:100000] settings.logger.warning("Not using all the crops") total = len(filepaths) logger.info("Generating object density maps of size " + str(odm_target_size) + " for " + str(total) + " crops...") logger.warning("Skip_pups set to " + str(skip_pups)) #Iterate over all the crops for filepath in filepaths: meta = {} meta['filepath'] = filepath meta['filename'] = utils.get_file_name_part(meta['filepath']) meta['count'] = int(meta['filename'].split('clions')[0]) meta['coordinates'] = meta['filename'].split('_')[1][2:] meta['id'] = meta['filename'].split('in')[1].split('_')[0] if meta['count'] == 0 and random.choice([0, 0, 1, 1, 1]): #We skip 60% of the negatives total -= 1 continue #Initialize the object density map matrix odm = np.zeros((odm_original_size, odm_original_size)) #Fill the odm with marks where the sealions are for sealion in self.train_original_coordinates[meta['id']]: if sealion['category'] == 'pups' and skip_pups: continue sealion['row'] = float(sealion['y_coord']) sealion['column'] = float(sealion['x_coord']) crop_ix = { 'row': float(meta['coordinates'].split('-')[1]), 'column': float(meta['coordinates'].split('-')[0]) } if cropping.RegionCropper.is_inside(None, sealion, crop_ix, odm_original_size): sealion['column'] = sealion['column'] - crop_ix['column'] sealion['row'] = sealion['row'] - crop_ix['row'] row = int(sealion['row']) column = int(sealion['column']) mark = marks[sealion['category']] radius = round(mark.shape[0] / 2.) effective_mark = mark[max(0, radius - row):radius + odm_original_size - row, max(0, radius - column):radius + odm_original_size - column] odm[max(0, row - radius):row + radius, max(0, column - radius):column + radius] += effective_mark #Resize to match the desired input shape of the network odm = scipy.misc.imresize(odm, (odm_target_size, odm_target_size)) if odm.max() > 0: odm = odm / odm.max() #Add one dimension for the single channel odm = np.expand_dims(odm, axis=2) #print(odm.max(), odm.mean(),9999) images.append({ 'x': (lambda filepath: lambda: self.load(filepath))( meta['filepath']), 'm': meta, 'y': odm }) if i % 1000 == 0: logger.info(str(100 * i / total)[:5] + str("% completed")) i += 1 return images
def generate_obms(impaths): data_type = 'original_test' #data_type = 'region_crops' prediction_class_type = 'odm' #prediction_class_type = 'single' validate = False class_balancing = False input_shape = (224, 224, 3) batch_size = 1 crop_size = 400 arch = 'xception' input_weights_name = 'xception-lay106-heatmap_crops-ep011-tloss0.0068-vloss0.0067.hdf5' tl = network.TransferLearningSeaLionHeatmap( data_type=data_type, input_shape=input_shape, prediction_class_type=prediction_class_type, class_balancing=class_balancing, mini_batch_size=batch_size, validate=validate) #tl = network.TransferLearningSeaLionOrNoSeaLion(data_type = data_type, input_shape = input_shape, prediction_class_type = prediction_class_type, class_balancing= class_balancing, mini_batch_size=mini_batch_size, validate = validate) tl.build(arch, input_shape=input_shape) tl.load_weights(input_weights_name) cnn_output_shape = tl.model.layers[-1].output_shape[1:-1] np.random.shuffle(impaths) #impaths = [] #fixes = [1196, 1652, 7664] #for i in fixes: # impaths.append('/vol/tensusers/vgarciacazorla/MLP/noaa-sea-lion-count/data/test_st1/original/'+str(i)+'.jpg') for impath in impaths: meta = {'filename': utils.get_file_name_part(impath)} if os.path.isfile( os.path.join( os.path.join(settings.OBMS_OUTPUT_DIR, meta['filename'] + '_obm.npy'))): print("skipping") continue test_image_original = scipy.misc.imread(impath) t0 = time.time() #test_image_original, meta = iterator.__next__() #test_image_original = test_image_original[0] #meta = meta[0] test_image_original = test_image_original / test_image_original.max() aux_height = test_image_original.shape[ 0] - test_image_original.shape[0] % crop_size + int( 1.5 * crop_size) aux_width = test_image_original.shape[ 1] - test_image_original.shape[1] % crop_size + int( 1.5 * crop_size) padded = np.zeros((aux_height, aux_width, 3)) padded[:test_image_original.shape[0], :test_image_original. shape[1], :] = test_image_original test_image_1 = padded[:aux_height - int(0.5 * crop_size), :aux_width - int(0.5 * crop_size)] test_image_2 = padded[int(0.5 * crop_size):, int(0.5 * crop_size):] #print(test_image.mean()) obms = [] plot = 0 nrows = int(test_image_1.shape[0] / crop_size) ncolumns = int(test_image_1.shape[1] / crop_size) total = float((nrows * ncolumns)) * 2 for wa in [test_image_original, padded, test_image_1, test_image_2]: if plot: plt.figure() plt.imshow(wa) plt.show() settings.logger.info( "Going for image " + str(meta['filename']) ) #+" with shape "+str(test_image_original.shape),", padded ",str(padded.shape)) count = 0.0 for test_image in [test_image_1, test_image_2]: if plot: plt.figure() plt.imshow(test_image) plt.show() full_obm = [] for row in range(nrows): row_obms = [] for column in range(ncolumns): if count % 15 == 0: settings.logger.info( str(100 * count / total)[:4] + "% completed of " + meta['filename']) crop = utils.crop_image( test_image, (column * crop_size, row * crop_size), crop_size) if utils.get_blacked_out_perc(crop) > 0.85: obm = np.zeros(cnn_output_shape) else: crop = scipy.misc.imresize(crop, input_shape) if crop.max() > 0: crop = crop / crop.max() crop = np.expand_dims(crop, axis=0) obm = tl.model.predict(crop) obm = np.squeeze(obm) row_obms.append(obm) count += 1 row_obms = np.hstack(row_obms) full_obm.append(row_obms) full_obm = np.vstack(full_obm) #print(full_obm.shape, full_obm.max(),full_obm.mean(),full_obm.min()) if plot: plt.figure() plt.imshow(np.squeeze(full_obm), cmap='gray') plt.title(str(full_obm.sum())) plt.show() obms.append(full_obm) final_obm_1 = np.zeros( (obms[0].shape[0] + int(cnn_output_shape[0] / 2), obms[0].shape[1] + int(cnn_output_shape[1] / 2))) final_obm_2 = final_obm_1.copy() final_obm_1[:obms[0].shape[0], :obms[0].shape[1]] = obms[0] final_obm_2[int(cnn_output_shape[0] / 2):, int(cnn_output_shape[0] / 2):] = obms[1] final_obm = (final_obm_2 + final_obm_1) / 2 full_obm = final_obm trunc_img = padded #[:crop_size*nrows,:crop_size*ncolumns] trunc_img = scipy.misc.imresize( trunc_img, (full_obm.shape[0], full_obm.shape[1], 3)) trunc_img = trunc_img / trunc_img.max() red_obm = np.zeros((full_obm.shape[0], full_obm.shape[1], 3)) red_obm[:, :, 0] = full_obm red_obm = red_obm / red_obm.max() obms.append(red_obm) img_sum = cv2.addWeighted(src1=trunc_img, alpha=1, src2=red_obm, beta=0.6, gamma=0.001) img_sum = img_sum / img_sum.max() scipy.misc.imsave( 'image_samples/heatmaps/' + meta['filename'] + '_obm.jpg', img_sum) if plot: plt.figure() plt.imshow(img_sum) plt.show() np.save( os.path.join(settings.OBMS_OUTPUT_DIR, meta['filename'] + '_obm'), final_obm) settings.logger.info(meta['filename'] + " completed in " + str(time.time() - t0) + " seconds")
model = get_model() #weights_name = 'obm_regressor-ep001-tloss6.4490-vloss6.3203.hdf5' weights_name = 'obm_regressor-ep016-tloss13.2885-vloss12.0621.hdf5' weights_filepath = os.path.join(settings.WEIGHTS_DIR, weights_name) model.load_weights(weights_filepath) obm_paths = sorted(glob.glob(os.path.join(settings.OBMS_OUTPUT_DIR, '*'))) threshold = 0.3 window_size = 80 import pandas as pd import numpy as np np.random.shuffle(obm_paths) for obm_path in obm_paths: #obm_path = '/vol/tensusers/vgarciacazorla/MLP/noaa-sea-lion-count/output/obms/16435_obm.h5.npy' fname = utils.get_file_name_part(obm_path) if os.path.isfile('image_samples/coords_images/' + fname.split('_')[0] + '_test.jpg'): continue else: pass #print("MISSING ",fname) if os.path.isfile( os.path.join(settings.CNN_COORDINATES_DIR, fname.split('_')[0] + '_coords.csv')): #print("Skippppp",fname) pass print(1) test_image = scipy.misc.imread( os.path.join(settings.TRAIN_DIR, 'original', fname.split('_')[0] + '.jpg')) print(2)