def start_processing(self, process_shard_function, threads=-1, ramp_delay=(1, 10), shard_ids=None): """ Start processing shards in parallel using Threads. process_shard_function: must support parameters 'shard_group' and 'shard_id' threads: if -1, will use all available cores ramp_sleep: random time in seconds to wait between Thread launches in format (min, max) seconds. shard_ids: shard_ids to be processed. If None, all shards will be processed """ #mp.set_start_method('spawn') if (threads < 0): threads = multiprocessing.cpu_count() logger.info('Using ' + str(threads) + ' parallel tasks') with Pool(threads) as p: if (shard_ids == None): shard_ids = list(range(1, self.nr_shards + 1)) shuffle(shard_ids) p.starmap(self.process_shard, [(sid, process_shard_function, ramp_delay) for sid in shard_ids])
def evaluate_dataset_tflearn(X, Y, model, batch_size=24, detailed=True, class_labels=None): acc = model.evaluate(X, Y, batch_size=batch_size) logger.info('Loss: ' + str(acc)) if (detailed): Y_pred = model.predict(X, batch_size=batch_size, verbose=1) #we only need the highest probability guess Y_pred = np.flip(Y_pred, 1) Y_pred = Y_pred[:, 0] #convert from categorical to label lb = preprocessing.LabelBinarizer() lb.fit(np.array(range(5))) Y = lb.inverse_transform(Y) logger.info('Nr test samples: ' + str(len(X))) logger.info('\nKappa score (was this luck?): ' + str(metrics.cohen_kappa_score(Y, Y_pred)) + '\n') cm = metrics.confusion_matrix(Y, Y_pred) logger.info('Confusion matrix:') logger.info(cm) utils.plot_confusion_matrix(cm)
def prepare_model_dirs(output_dir): dir_tflogs = output_dir + 'tf-logs' dir_checkpoints = output_dir + 'tf-checkpoint' dir_checkpoint_best = output_dir + 'tf-checkpoint-best' logger.info('Preparing output dir') utils.mkdirs(output_dir, dirs=['tf-logs'], recreate=False) return dir_tflogs, dir_checkpoints, dir_checkpoint_best
def evaluate_dataset(dataset_path, model): with h5py.File(dataset_path, 'r') as hdf5: X = hdf5['X'] Y = hdf5['Y'] logger.debug('X_test shape ' + str(X.shape)) logger.debug('Y_test shape ' + str(Y.shape)) # for y in Y: # print('y=', y) logger.info('Evaluate performance on dataset '+ dataset_path +'...') acc = model.evaluate(X, Y, batch_size=12) logger.info('Accuracy: ' + str(acc))
def call_os_command(command): """ Call OS command, await the async call, check status, and log result. :param command: array of strings :return: none """ child = subprocess.Popen(command, stdout=subprocess.PIPE) streamdata = child.communicate()[0] data = streamdata.decode('utf-8') rc = child.returncode if data: logger.info(data) status_check(rc) else: return
def shard_items(self, shard_id): """ Select some items for the specified shard_id. Returned items will be different from one shard to another. shard_id: 1-N shard number returns: list of items for this shard """ shard_items = [] for item in self.items: p = hashlib.sha224(str(item).encode('utf-8')).hexdigest() if (int(p, 16) % self.nr_shards == (shard_id - 1)): shard_items.append(item) logger.info('found {} items for shard {}'.format( len(shard_items), shard_id)) shuffle(shard_items, lambda: self.random_seed) return shard_items
def pyramid_generator(image, scale=0.5, max_layers=-1): current_scale = 1 if (max_layers == -1): max_layers = 99999 for layer in range(max_layers): if (layer > 0): # if(len(image.shape)==2): # downscale = (downscale,downscale) # elif(len(image.shape)==3): # downscale = (downscale,downscale,1) print(str(image.shape) + ' ' + str(scale)) # image = transform.downscale_local_mean(image, downscale) image = transform.rescale(image, scale) print(str(image.shape)) current_scale = current_scale * scale if image.shape[0] == 1 or image.shape[1] == 1: return logger.info('pyramid layer=' + str(layer) + ' image=' + str(image.shape) + ' scale=' + str(current_scale)) yield image, current_scale
def predict_patient(input_dir, patient_id, image_dims, model, output_dir): logger.info('>>> Predict patient_id ' + patient_id) logger.info('Loading pre-processed images for patient') #patient pre-processed image cache dataset_file = utils.dataset_path(output_dir, 'cache-predict', image_dims) patient_pixels = None with h5py.File(dataset_file, 'a') as h5f: try: patient_pixels = h5f[patient_id] logger.debug('Patient image found in cache. Using it.') #disconnect from HDF5 patient_pixels = np.array(patient_pixels) except KeyError: logger.debug('Patient image not found in cache') t = Timer('Preparing patient scan image volume. patient_id=' + patient_id) patient_pixels = lungprepare.process_patient_images(input_dir + patient_id, image_dims) if(patient_pixels is None): logger.warning('Patient lung not found. Skipping.') logger.debug('Storing patient image in cache') h5f[patient_id] = patient_pixels t.stop() t = Timer('Predicting result on CNN (forward)') y = model.predict(np.expand_dims(patient_pixels, axis=0)) logger.info('PATIENT '+ patient_id +' PREDICT=' + str(y)) utils.show_slices(patient_pixels, patient_id) t.stop() return y
def main(): """ Main program control, here we have entries for each command subset of quality gate. The token file containers the credentials for the runner -> terraform cloud authentication :return: none """ stage = 'Terraform Format Check (terraform fmt)' logger.info('Calling {0}'.format(stage)) call_os_command(['terraform', '-v']) call_os_command(['terraform', 'fmt', '-check']) stage = 'Terraform Static Analysis (tflint)' logger.info('Calling {0}'.format(stage)) call_os_command(['tflint', '-v']) call_os_command(['tflint']) if os.environ.get('INPUT_TERRATEST') is not None: if os.environ.get('INPUT_TERRATEST').upper() == 'AWS': os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get('INPUT_AWS_ACCESS_KEY_ID') os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get('INPUT_AWS_SECRET_ACCESS_KEY') terratest() elif os.environ.get('INPUT_TERRATEST').lower() == 'terraform_cloud': terraform_cloud_setup() terratest() else: logger.error('Terratest enabled but no valid cloud provider selected. Please consult README.md') sys.exit(1) logger.info('Terraform Quality Gate finished successfully!') sys.exit(0)
def prepare_cnn_model(network, output_dir, model_file=None): global _model if(_model == None): logger.info('Prepare CNN') dir_tflogs, dir_checkpoints, dir_checkpoint_best = prepare_model_dirs(output_dir) logger.info('Initializing network...') _model = tflearn.models.dnn.DNN(network, tensorboard_verbose=3, tensorboard_dir=dir_tflogs, checkpoint_path=dir_checkpoints, best_checkpoint_path=dir_checkpoint_best) if(model_file!=None): logger.info('Load previous training...') _model.load(model_file) else: logger.info('CNN model already loaded. Reusing it.') return _model
def evaluate_dataset(dataset_path, model, batch_size=12, confusion_matrix=False, nr_items=-1): with h5py.File(dataset_path, 'r') as hdf5: X = hdf5['X'][-1:nr_items] Y = hdf5['Y'][-1:nr_items] logger.debug('X_test shape ' + str(X.shape)) logger.debug('Y_test shape ' + str(Y.shape)) logger.info('Evaluate performance on dataset '+ dataset_path +'...') acc = model.evaluate(X, Y, batch_size=batch_size) logger.info('Accuracy: ' + str(acc)) if(confusion_matrix): logger.info('Confusion matrix') Y_pred = model.predict(X) print(sklearn.metrics.confusion_matrix(Y, Y_pred))
def evaluate_dataset_keras(xy_generator, nr_batches, nr_samples, model, detailed=True, class_labels=None): logger.info('Evaluating model performance (' + str(nr_samples) + ' samples)...') acc = model.evaluate_generator(xy_generator, nr_batches) logger.info('Accuracy: ' + str(acc[1]) + ' - Loss: ' + str(acc[0])) if (detailed): logger.info('Predicting Y for detailed analysis...') acum = YAcumGenerator() Y_pred = model.predict_generator(acum.generator(xy_generator), nr_batches + 1) #sometimes predict_generator returns more samples than nr_batches*batch_size Y_pred = np.array(np.split(Y_pred, [nr_samples]))[0] #we only need the highest probability guess Y_pred = np.argmax(Y_pred, axis=1) Y = acum.y_ds Y = np.array(np.split(Y, [nr_samples]))[0] if (len(Y) > 0): #convert from categorical to label lb = preprocessing.LabelBinarizer() lb.fit(np.array(range(np.shape(Y[0])[0]))) Y = lb.inverse_transform(Y) utils.evaluate_predictions(Y, Y_pred, detailed=detailed, class_labels=class_labels) else: logger.info('No samples found in xy_generator')
def __init__(self, source_xy_generator, image_augmentation=None, max_augmentation_ratio=3, max_undersampling_ratio=1, output_weight=1, enforce_max_ratios=False, start_ratio=0, end_ratio=1, batch_size=64, tmp_file=None, change_y=None): self.source_xy_generator = source_xy_generator self.Y_labels = None self.change_y = change_y logger.info('loading input data for class distribution analysis...') Y_onehot = None if (tmp_file != None): if (not tmp_file.endswith('.npy')): tmp_file = tmp_file + '.npy' if (os.path.exists(tmp_file)): logger.info('loading Y from temporary file ' + tmp_file) try: Y_onehot = np.load(tmp_file) except: logger.warn('error loading temp file. ignoring. e=' + str(sys.exc_info()[0])) pass if (Y_onehot == None): logger.info('loading Y from raw dataset') _, Y_onehot = dump_xy_to_array(source_xy_generator.flow(), source_xy_generator.size, x=False, y=True) if (tmp_file != None): logger.info('saving Y to temp file ' + tmp_file) np.save(tmp_file, Y_onehot) if (self.change_y is not None): Y_onehot = change_classes(Y_onehot, self.change_y) self.Y_labels = onehot_to_label(Y_onehot) self.count_classes = class_distribution(Y_onehot) self.nr_classes = np.shape(self.count_classes)[0] self.image_augmentation = image_augmentation smallest_class = None smallest_qtty = 999999999 largest_class = None largest_qtty = 0 logger.info('raw sample class distribution') for i, c in enumerate(self.count_classes): logger.info(str(i) + ': ' + str(c)) if (c > 0 and c < smallest_qtty): smallest_qtty = c smallest_class = i if (c > largest_qtty): largest_qtty = c largest_class = i minq = largest_qtty - largest_qtty * max_undersampling_ratio maxq = smallest_qtty + smallest_qtty * max_augmentation_ratio qtty_per_class = max(minq, maxq) logger.info('overall output samples per class: ' + str(qtty_per_class)) logger.info('augmentation/undersampling ratio per class') self.ratio_classes = np.zeros(len(self.count_classes)) for i, c in enumerate(self.count_classes): if (c == 0): self.ratio_classes[i] = 0 else: self.ratio_classes[i] = qtty_per_class / c if (enforce_max_ratios): if (self.ratio_classes[i] < 1): self.ratio_classes[i] = max((1 - max_undersampling_ratio), self.ratio_classes[i]) elif (self.ratio_classes[i] > 1): self.ratio_classes[i] = min(1 + max_augmentation_ratio, self.ratio_classes[i]) self.ratio_classes = output_weight * self.ratio_classes self.setup_flow(start_ratio, end_ratio, batch_size=batch_size)
def terratest(): stage = 'AWS Terraform Integration Testing (terratest)' logger.info('Calling {0}'.format(stage)) call_os_command(['go', 'test', '-v', './tests'])
def start(self): self._start = time() if (self._debug): logger.info('> [started] ' + self._name + '...')
def stop(self): self._lastElapsed = (time() - self._start) if (self._debug): logger.info('> [done] {} ({:.3f} ms)'.format( self._name, self._lastElapsed * 1000))
def show_images(image_list, image_labels=None, group_by_label=False, cols=4, name='image', output_dir=None, is_bgr=False, cmap=None, size=6): logger.info('showing ' + str(len(image_list)) + ' images') fig = plt.figure() rows = int(len(image_list) / cols) + 1 t = Timer('generating image patches. rows=' + str(rows) + '; cols=' + str(cols)) fig.set_size_inches(cols * size, rows * size) image_indexes = range(len(image_list)) #order indexes by label if (group_by_label == True and image_labels != None): index_label_map = [] for i, label in enumerate(image_labels): index_label_map.append((i, label)) label_image_map = np.array(index_label_map, dtype=[('index', int), ('label', int)]) label_image_map = np.sort(label_image_map, order='label') image_indexes = [] for a in label_image_map: image_indexes.append(a[0]) c = 0 for i in image_indexes: im = image_list[i] if (is_bgr): im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) y = fig.add_subplot(rows, cols, c + 1) if (cmap == None): im = im.astype('uint8') y.imshow(im, cmap=cmap) if (image_labels != None): seed = int( int( hashlib.md5(str( image_labels[i]).encode('utf-8')).hexdigest(), 16) / 999999999999999999999999999999) np.random.seed(seed) color = np.random.rand(3, 1) y.text(4, 17, str(image_labels[i]), fontsize=16, style='normal', bbox={ 'facecolor': color, 'alpha': 1, 'pad': 4 }) y.text(4, np.shape(im)[1] - 7, '[' + str(i) + ']', fontsize=12, style='normal') #y.add_patch(patches.Rectangle((0, 0), np.shape(im)[0]-1, np.shape(im)[1]-1, color=color, linewidth=4, fill=False)) c = c + 1 if (output_dir != None): f = output_dir + name + '.jpg' plt.savefig(f) plt.close(fig) else: plt.show() t.stop()
def evaluate_predictions(Y_true, Y_pred, detailed=True, class_labels=None): acc = metrics.accuracy_score(Y_true, Y_pred) logger.info('Accuracy: ' + str(acc)) if (detailed): if (class_labels == None): unique_labels = np.unique(Y_true) class_labels = [str(s) for s in unique_labels] cm = metrics.confusion_matrix(Y_true, Y_pred, range(len(class_labels))) logger.info('Number of test samples: ' + str(len(Y_true))) logger.info('Kappa score: ' + str(metrics.cohen_kappa_score(Y_true, Y_pred)) + ' (-1 bad; 0 just luck; 1 great)') logger.info( '\n' + metrics.classification_report(Y_true, Y_pred, target_names=class_labels, labels=range(len(class_labels)))) acc_class = cm.diagonal() / np.sum(cm, axis=0) logger.info('Accuracy per class:') for i, acc in enumerate(acc_class): logger.info( str('{}: {:.1f}%'.format(class_labels[i], acc_class[i] * 100))) logger.info('Confusion matrix:') logger.info('\n' + str(cm)) plot_confusion_matrix(cm, class_labels=class_labels, size=2)
def flow(self, max_samples=None, output_dtype='uint8'): logger.info('starting new flow...') if (np.sum(self.ratio_classes) == 0): raise StopIteration( 'no item will be returned by this iterator. aborting') x_batch = np.array([], dtype=output_dtype) y_batch = np.array([], dtype=output_dtype) pending_augmentations = np.zeros(self.nr_classes, dtype='uint32') #process each source batch count_samples = 0 for xs, ys in self.source_xy_generator.flow(): if (self.change_y is not None): ys = change_classes(ys, self.change_y) y_labels = onehot_to_label(ys) for i, x in enumerate(xs): y = ys[i] if (max_samples != None and count_samples >= max_samples): break label = y_labels[i] r = self.ratio_classes[label] #add sample if (r == 1): x_batch, y_batch = self._add_to_batch( x_batch, y_batch, x, y) # logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size)) if (len(y_batch) >= self.batch_size): # logger.info('yielding batch1') yield x_batch, y_batch x_batch = np.array([]).astype(output_dtype) y_batch = np.array([]).astype(output_dtype) #undersample elif (r < 1): #accept sample at the rate it should so we balance classes rdm = random.random() if (rdm <= r): x_batch, y_batch = self._add_to_batch( x_batch, y_batch, x, y) # logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size)) if (len(y_batch) >= self.batch_size): # logger.info('yielding batch2') yield x_batch, y_batch x_batch = np.array([]).astype(output_dtype) y_batch = np.array([]).astype(output_dtype) #augmentation elif (r > 1): #accept sample x_batch, y_batch = self._add_to_batch( x_batch, y_batch, x, y) # logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size)) if (len(y_batch) >= self.batch_size): # logger.info('yielding batch3') yield x_batch, y_batch x_batch = np.array([]).astype(output_dtype) y_batch = np.array([]).astype(output_dtype) pending_augmentations[label] += (r - 1) #generate augmented copies of images so we balance classes if (pending_augmentations[label] > 1): x1 = cv2.cvtColor(x, cv2.COLOR_BGR2RGB) x_orig = np.array([x1]) y_orig = np.array([y]) # show_image(x_orig[0], is_bgr=False) ir = self.image_augmentation.flow(x_orig, y_orig, batch_size=1) while (pending_augmentations[label] > 1): it = ir.next() x_it = it[0][0] y_it = it[1][0] x_it = cv2.cvtColor(x_it, cv2.COLOR_RGB2BGR) x_batch, y_batch = self._add_to_batch( x_batch, y_batch, x_it, y_it) # logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size)) if (len(y_batch) >= self.batch_size): # logger.info('yielding batch4') yield x_batch, y_batch x_batch = np.array([]).astype(output_dtype) y_batch = np.array([]).astype(output_dtype) pending_augmentations[label] -= 1
def setup_flow(self, output_start_ratio, output_end_ratio, batch_size=64): if (output_start_ratio > output_end_ratio): raise Exception('output_start_ratio: start must be before end!') logger.info('SETUP FLOW {} {}'.format(output_start_ratio, output_end_ratio)) output_total_size = 0 for i, ratio in enumerate(self.ratio_classes): class_total = np.floor(self.count_classes[i] * ratio) output_total_size += class_total logger.info( 'calculating source range according to start/end range of the desired output..' ) output_pos = 0 output_start_pos = int(np.ceil(output_total_size * output_start_ratio)) output_end_pos = int(np.floor(output_total_size * output_end_ratio)) self.size = output_end_pos - output_start_pos self.nr_batches = int(np.ceil(self.size / batch_size)) self.batch_size = batch_size logger.info('output distribution for this flow') for i, ratio in enumerate(self.ratio_classes): class_total = np.floor(self.count_classes[i] * ratio) logger.info('{}: {} ({:.2f})'.format( i, int(class_total * (output_end_ratio - output_start_ratio)), ratio)) source_start_pos = None source_end_pos = None for i, y_label in enumerate(self.Y_labels): r = self.ratio_classes[y_label] if (r == 1): output_pos += 1 elif (r < 1): if (random.random() <= r): output_pos += 1 elif (r > 1): output_pos += r if (source_start_pos == None and output_pos >= output_start_pos): source_start_pos = i if (source_start_pos != None and output_pos <= output_end_pos): source_end_pos = i logger.info('source range: ' + str(source_start_pos) + '-' + str(source_end_pos) + ' (' + str(source_end_pos - source_start_pos) + ')') logger.info('output range: ' + str(output_start_pos) + '-' + str(output_end_pos) + ' (' + str(output_end_pos - output_start_pos) + ')') if 'setup_flow' in dir(self.source_xy_generator): self.source_xy_generator.setup_flow(source_start_pos, source_end_pos)
def terraform_cloud_setup(): terraform_token_file = "~/.terraform.d/credentials.tfrc.json" logger.info('Writing auth token') write_token(terraform_token_file, os.environ['TERRAFORM_CLOUD_TOKEN'])
def validate_xy_dataset(dataset_file, save_dir=None): ok = True logger.info('VALIDATING DATASET ' + dataset_file) with h5py.File(dataset_file, 'r') as h5f: x_ds = h5f['X'] y_ds = h5f['Y'] if (len(x_ds) != len(y_ds)): logger.warning( 'VALIDATION ERROR: x and y datasets with different lengths') ok = False for px in range(len(x_ds)): arr = np.array(x_ds[px]) if (not np.any(arr)): logger.warning('VALIDATION ERROR: Image not found at index=' + str(px)) ok = False label_total = np.array([[0, 0]]) for py in range(len(y_ds)): arr = np.array(y_ds[py]) label_total = arr + label_total if (not np.any(arr) or np.all(arr) or arr[0] == arr[1]): logger.warning( 'VALIDATION ERROR: Invalid label found at index=' + str(py) + ' label=' + str(arr)) ok = False label0_ratio = label_total[0][0] / len(y_ds) label1_ratio = label_total[0][1] / len(y_ds) logger.info('Summary') logger.info('X shape=' + str(x_ds.shape)) logger.info('Y shape=' + str(y_ds.shape)) logger.info('Y: total: ' + str(len(y_ds))) logger.info('Y: label 0: ' + str(label_total[0][0]) + ' ' + str(100 * label0_ratio) + '%') logger.info('Y: label 1: ' + str(label_total[0][1]) + ' ' + str(100 * label1_ratio) + '%') logger.info('Recording sample data') size = len(x_ds) qtty = min(3, size) f = size / qtty for i in range(qtty): pi = round(i * f) logger.info('image_index ' + str(pi)) logger.info('x=') if (save_dir != None): mkdirs(save_dir) show_slices(x_ds[pi], str(i) + str(y_ds[pi]), output_dir=save_dir) logger.info('y=' + str(y_ds[pi])) return ok
def export_lions(image_raw, image_dotted, target_x_ds, target_y_ds, image_dims, debug=False, min_distance_others=50, non_lion_distance=150, export_non_lion=True): NR_CLASSES = 6 #BLACKOUT PORTIONS OF IMAGE IN RAW PICTURE image_dotted_bw = cv2.cvtColor(image_dotted, cv2.COLOR_BGR2GRAY) #utils.show_image(image_dotted_bw, size=8) mask = cv2.threshold(image_dotted_bw, 5, 255, cv2.THRESH_BINARY)[1] #utils.show_image(mask, size=8) image_raw_bw = cv2.cvtColor(image_raw, cv2.COLOR_BGR2GRAY) image_raw = cv2.bitwise_and(image_raw, image_raw, mask=mask) #utils.show_image(image_raw, size=8, is_bgr=True) #ISOLATE HUMAN MARKS ON DOTTED PICTURE diff_color = cv2.absdiff(image_dotted, image_raw) diff = cv2.cvtColor(diff_color, cv2.COLOR_BGR2GRAY) kernel = np.ones((2, 2), np.uint8) diff = cv2.morphologyEx(diff, cv2.MORPH_OPEN, kernel) ret, diff = cv2.threshold(diff, 10, 255, cv2.THRESH_TOZERO) ret, diff = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY) #debug data debug_image = image_dotted.copy() images = [] #find all dotted sea lions count1 = 0 count_class = np.zeros(NR_CLASSES) lion_positions = [] lion_classes = [] im2, contours, hierarchy = cv2.findContours(diff, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) for c in contours: x, y, w, h = cv2.boundingRect(c) if (w > 4 and h > 4): count1 = count1 + 1 center = (x + round(w / 3), y + round(h / 3)) clazz = find_class(image_dotted, center) if (clazz == -1): logger.warning('could not detect sea lion class at ' + str(center)) continue lion_positions.append(center) count_class[clazz] = count_class[clazz] + 1 lion_classes.append(clazz) if (debug): cv2.circle(debug_image, center, round(w / 2), (255, 0, 0), 1) count_class_added = np.zeros(NR_CLASSES) #add found sea lions to training dataset #filter out lions that are too near each other to minimize noise on training set count2 = 0 for i, lion_pos in enumerate(lion_positions): lion_class = lion_classes[i] is_far = True if (min_distance_others > 0): is_far = utils.is_far_from_others(lion_pos, lion_positions, min_distance_others) if (is_far): #export patch to train dataset count2 = count2 + 1 pw = round(image_dims[1] / 2) ph = image_dims[1] - pw #trainX = image_raw[lion_pos[1]-pw:lion_pos[1]+ph,lion_pos[0]-pw:lion_pos[0]+ph] trainX = utils.crop_image_fill( image_raw, (lion_pos[1] - pw, lion_pos[0] - pw), (lion_pos[1] + ph, lion_pos[0] + ph)) m = np.mean(trainX) if (m > 30 and m < 225 and m != 127): if (debug): images.append(trainX) cv2.circle(debug_image, lion_pos, round(w / 2), (0, 0, 255), 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(debug_image, str(lion_class), lion_pos, font, 1.1, (255, 255, 255), 2, cv2.LINE_AA) #normalize between 0-1 #trainX = trainX/255 trainY = keras.utils.np_utils.to_categorical([lion_class], NR_CLASSES)[0] if (target_x_ds != None and target_y_ds != None): utils.add_sample_to_dataset(target_x_ds, target_y_ds, trainX, trainY) count_class_added[ lion_class] = count_class_added[lion_class] + 1 #identify non sea lion patches count3 = 0 if (export_non_lion): s = np.shape(image_raw) for i in range(int(count2 * 1.1)): patch_pos = (random.randint(image_dims[1] * 2, s[1] - image_dims[1] * 2), random.randint(image_dims[0] * 2, s[0] - image_dims[0] * 2)) is_far = utils.is_far_from_others(patch_pos, lion_positions, non_lion_distance) if (is_far): #export patch to train dataset pw = round(image_dims[1] / 2) ph = image_dims[1] - pw #trainX = image_raw[lion_pos[1]-pw:lion_pos[1]+ph,lion_pos[0]-pw:lion_pos[0]+ph] trainX = utils.crop_image_fill( image_raw, (patch_pos[1] - pw, patch_pos[0] - pw), (patch_pos[1] + ph, patch_pos[0] + ph)) m = np.mean(trainX) if (m > 50 and m < 200): count3 = count3 + 1 if (debug): images.append(trainX) cv2.circle(debug_image, patch_pos, round(w / 2), (0, 255, 0), 3) #normalize between 0-1 #trainX = trainX/255 trainY = keras.utils.np_utils.to_categorical([5], NR_CLASSES)[0] if (target_x_ds != None and target_y_ds != None): utils.add_sample_to_dataset(target_x_ds, target_y_ds, trainX, trainY) count_class[5] = count_class[5] + 1 count_class_added[5] = count_class_added[5] + 1 logger.info('sea lions found: ' + str(count1)) logger.info('sea lions added to dataset: ' + str(count2)) logger.info('non sea lions added to dataset: ' + str(count3)) if (target_x_ds != None and target_y_ds != None): logger.info('dataset size: ' + str(len(target_x_ds))) if (debug): utils.show_image(debug_image, size=40, is_bgr=True) utils.show_images(images, cols=10, is_bgr=True, size=1.5) return count_class, count_class_added, lion_positions, lion_classes