Ejemplo n.º 1
0
    def start_processing(self,
                         process_shard_function,
                         threads=-1,
                         ramp_delay=(1, 10),
                         shard_ids=None):
        """
            Start processing shards in parallel using Threads. 
            process_shard_function: must support parameters 'shard_group' and 'shard_id'
            threads: if -1, will use all available cores
            ramp_sleep: random time in seconds to wait between Thread launches in format (min, max) seconds.
            shard_ids: shard_ids to be processed. If None, all shards will be processed
        """

        #mp.set_start_method('spawn')
        if (threads < 0):
            threads = multiprocessing.cpu_count()
        logger.info('Using ' + str(threads) + ' parallel tasks')

        with Pool(threads) as p:
            if (shard_ids == None):
                shard_ids = list(range(1, self.nr_shards + 1))
            shuffle(shard_ids)
            p.starmap(self.process_shard,
                      [(sid, process_shard_function, ramp_delay)
                       for sid in shard_ids])
Ejemplo n.º 2
0
def evaluate_dataset_tflearn(X,
                             Y,
                             model,
                             batch_size=24,
                             detailed=True,
                             class_labels=None):
    acc = model.evaluate(X, Y, batch_size=batch_size)
    logger.info('Loss: ' + str(acc))

    if (detailed):
        Y_pred = model.predict(X, batch_size=batch_size, verbose=1)

        #we only need the highest probability guess
        Y_pred = np.flip(Y_pred, 1)
        Y_pred = Y_pred[:, 0]

        #convert from categorical to label
        lb = preprocessing.LabelBinarizer()
        lb.fit(np.array(range(5)))
        Y = lb.inverse_transform(Y)

        logger.info('Nr test samples: ' + str(len(X)))

        logger.info('\nKappa score (was this luck?): ' +
                    str(metrics.cohen_kappa_score(Y, Y_pred)) + '\n')

        cm = metrics.confusion_matrix(Y, Y_pred)
        logger.info('Confusion matrix:')
        logger.info(cm)

        utils.plot_confusion_matrix(cm)
Ejemplo n.º 3
0
def prepare_model_dirs(output_dir):
    dir_tflogs = output_dir + 'tf-logs'
    dir_checkpoints = output_dir + 'tf-checkpoint'
    dir_checkpoint_best = output_dir + 'tf-checkpoint-best'
    
    logger.info('Preparing output dir')
    utils.mkdirs(output_dir, dirs=['tf-logs'], recreate=False)

    return dir_tflogs, dir_checkpoints, dir_checkpoint_best
Ejemplo n.º 4
0
def evaluate_dataset(dataset_path, model):
    with h5py.File(dataset_path, 'r') as hdf5:
        X = hdf5['X']
        Y = hdf5['Y']
        logger.debug('X_test shape ' + str(X.shape))
        logger.debug('Y_test shape ' + str(Y.shape))
#         for y in Y:
#             print('y=', y)
            
        logger.info('Evaluate performance on dataset '+ dataset_path +'...')
        acc = model.evaluate(X, Y, batch_size=12)
        logger.info('Accuracy: ' + str(acc))
def call_os_command(command):
    """
    Call OS command, await the async call, check status, and log result.
    :param command: array of strings
    :return: none
    """
    child = subprocess.Popen(command, stdout=subprocess.PIPE)
    streamdata = child.communicate()[0]
    data = streamdata.decode('utf-8')
    rc = child.returncode
    if data:
        logger.info(data)
        status_check(rc)
    else:
        return
Ejemplo n.º 6
0
    def shard_items(self, shard_id):
        """
            Select some items for the specified shard_id. Returned items will be different from one shard to another.
            shard_id: 1-N shard number
            returns: list of items for this shard
        """
        shard_items = []

        for item in self.items:
            p = hashlib.sha224(str(item).encode('utf-8')).hexdigest()
            if (int(p, 16) % self.nr_shards == (shard_id - 1)):
                shard_items.append(item)
        logger.info('found {} items for shard {}'.format(
            len(shard_items), shard_id))
        shuffle(shard_items, lambda: self.random_seed)
        return shard_items
Ejemplo n.º 7
0
def pyramid_generator(image, scale=0.5, max_layers=-1):
    current_scale = 1
    if (max_layers == -1):
        max_layers = 99999
    for layer in range(max_layers):
        if (layer > 0):
            #            if(len(image.shape)==2):
            #                downscale = (downscale,downscale)
            #            elif(len(image.shape)==3):
            #                downscale = (downscale,downscale,1)
            print(str(image.shape) + ' ' + str(scale))
            #            image = transform.downscale_local_mean(image, downscale)
            image = transform.rescale(image, scale)
            print(str(image.shape))
            current_scale = current_scale * scale
            if image.shape[0] == 1 or image.shape[1] == 1:
                return
        logger.info('pyramid layer=' + str(layer) + ' image=' +
                    str(image.shape) + ' scale=' + str(current_scale))
        yield image, current_scale
Ejemplo n.º 8
0
def predict_patient(input_dir, patient_id, image_dims, model, output_dir):
    logger.info('>>> Predict patient_id ' + patient_id)
    logger.info('Loading pre-processed images for patient')

    #patient pre-processed image cache
    dataset_file = utils.dataset_path(output_dir, 'cache-predict', image_dims)    
    patient_pixels = None
    with h5py.File(dataset_file, 'a') as h5f:
        try:
            patient_pixels = h5f[patient_id]
            logger.debug('Patient image found in cache. Using it.')
            #disconnect from HDF5
            patient_pixels = np.array(patient_pixels)
            
        except KeyError:
            logger.debug('Patient image not found in cache')
            t = Timer('Preparing patient scan image volume. patient_id=' + patient_id)
            patient_pixels = lungprepare.process_patient_images(input_dir + patient_id, image_dims)
            if(patient_pixels is None):
                logger.warning('Patient lung not found. Skipping.')
            logger.debug('Storing patient image in cache')
            h5f[patient_id] = patient_pixels
            t.stop()
    
    t = Timer('Predicting result on CNN (forward)')
    y = model.predict(np.expand_dims(patient_pixels, axis=0))
    logger.info('PATIENT '+ patient_id +' PREDICT=' + str(y))
    utils.show_slices(patient_pixels, patient_id)
    t.stop()
    
    return y
def main():
    """
    Main program control, here we have entries for each command subset of quality gate.
    The token file containers the credentials for the runner -> terraform cloud authentication
    :return: none
    """
    stage = 'Terraform Format Check (terraform fmt)'
    logger.info('Calling {0}'.format(stage))
    call_os_command(['terraform', '-v'])
    call_os_command(['terraform', 'fmt', '-check'])

    stage = 'Terraform Static Analysis (tflint)'
    logger.info('Calling {0}'.format(stage))
    call_os_command(['tflint', '-v'])
    call_os_command(['tflint'])

    if os.environ.get('INPUT_TERRATEST') is not None:
        if os.environ.get('INPUT_TERRATEST').upper() == 'AWS':
            os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get('INPUT_AWS_ACCESS_KEY_ID')
            os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get('INPUT_AWS_SECRET_ACCESS_KEY')
            terratest()
        elif os.environ.get('INPUT_TERRATEST').lower() == 'terraform_cloud':
            terraform_cloud_setup()
            terratest()
        else:
            logger.error('Terratest enabled but no valid cloud provider selected. Please consult README.md')
            sys.exit(1)

    logger.info('Terraform Quality Gate finished successfully!')
    sys.exit(0)
Ejemplo n.º 10
0
def prepare_cnn_model(network, output_dir, model_file=None):
    global _model
    
    if(_model == None):
        
        logger.info('Prepare CNN')
        dir_tflogs, dir_checkpoints, dir_checkpoint_best = prepare_model_dirs(output_dir)

        logger.info('Initializing network...')
        _model = tflearn.models.dnn.DNN(network, tensorboard_verbose=3, 
                                         tensorboard_dir=dir_tflogs,
                                         checkpoint_path=dir_checkpoints,
                                         best_checkpoint_path=dir_checkpoint_best)

        if(model_file!=None):
            logger.info('Load previous training...')
            _model.load(model_file)
            
    else:
        logger.info('CNN model already loaded. Reusing it.')
        
    return _model
Ejemplo n.º 11
0
def evaluate_dataset(dataset_path, model, batch_size=12, confusion_matrix=False, nr_items=-1):
    with h5py.File(dataset_path, 'r') as hdf5:
        X = hdf5['X'][-1:nr_items]
        Y = hdf5['Y'][-1:nr_items]
        logger.debug('X_test shape ' + str(X.shape))
        logger.debug('Y_test shape ' + str(Y.shape))
        
        logger.info('Evaluate performance on dataset '+ dataset_path +'...')
        acc = model.evaluate(X, Y, batch_size=batch_size)
        logger.info('Accuracy: ' + str(acc))
        
        if(confusion_matrix):
            logger.info('Confusion matrix')
            Y_pred = model.predict(X)
            print(sklearn.metrics.confusion_matrix(Y, Y_pred))
Ejemplo n.º 12
0
def evaluate_dataset_keras(xy_generator,
                           nr_batches,
                           nr_samples,
                           model,
                           detailed=True,
                           class_labels=None):
    logger.info('Evaluating model performance (' + str(nr_samples) +
                ' samples)...')
    acc = model.evaluate_generator(xy_generator, nr_batches)
    logger.info('Accuracy: ' + str(acc[1]) + ' - Loss: ' + str(acc[0]))

    if (detailed):
        logger.info('Predicting Y for detailed analysis...')
        acum = YAcumGenerator()

        Y_pred = model.predict_generator(acum.generator(xy_generator),
                                         nr_batches + 1)
        #sometimes predict_generator returns more samples than nr_batches*batch_size
        Y_pred = np.array(np.split(Y_pred, [nr_samples]))[0]

        #we only need the highest probability guess
        Y_pred = np.argmax(Y_pred, axis=1)

        Y = acum.y_ds
        Y = np.array(np.split(Y, [nr_samples]))[0]
        if (len(Y) > 0):
            #convert from categorical to label
            lb = preprocessing.LabelBinarizer()
            lb.fit(np.array(range(np.shape(Y[0])[0])))
            Y = lb.inverse_transform(Y)
            utils.evaluate_predictions(Y,
                                       Y_pred,
                                       detailed=detailed,
                                       class_labels=class_labels)

        else:
            logger.info('No samples found in xy_generator')
Ejemplo n.º 13
0
    def __init__(self,
                 source_xy_generator,
                 image_augmentation=None,
                 max_augmentation_ratio=3,
                 max_undersampling_ratio=1,
                 output_weight=1,
                 enforce_max_ratios=False,
                 start_ratio=0,
                 end_ratio=1,
                 batch_size=64,
                 tmp_file=None,
                 change_y=None):
        self.source_xy_generator = source_xy_generator
        self.Y_labels = None
        self.change_y = change_y

        logger.info('loading input data for class distribution analysis...')

        Y_onehot = None

        if (tmp_file != None):

            if (not tmp_file.endswith('.npy')):
                tmp_file = tmp_file + '.npy'
            if (os.path.exists(tmp_file)):
                logger.info('loading Y from temporary file ' + tmp_file)
                try:
                    Y_onehot = np.load(tmp_file)
                except:
                    logger.warn('error loading temp file. ignoring. e=' +
                                str(sys.exc_info()[0]))
                    pass

        if (Y_onehot == None):
            logger.info('loading Y from raw dataset')
            _, Y_onehot = dump_xy_to_array(source_xy_generator.flow(),
                                           source_xy_generator.size,
                                           x=False,
                                           y=True)
            if (tmp_file != None):
                logger.info('saving Y to temp file ' + tmp_file)
                np.save(tmp_file, Y_onehot)

        if (self.change_y is not None):
            Y_onehot = change_classes(Y_onehot, self.change_y)

        self.Y_labels = onehot_to_label(Y_onehot)
        self.count_classes = class_distribution(Y_onehot)
        self.nr_classes = np.shape(self.count_classes)[0]
        self.image_augmentation = image_augmentation

        smallest_class = None
        smallest_qtty = 999999999
        largest_class = None
        largest_qtty = 0

        logger.info('raw sample class distribution')
        for i, c in enumerate(self.count_classes):
            logger.info(str(i) + ': ' + str(c))
            if (c > 0 and c < smallest_qtty):
                smallest_qtty = c
                smallest_class = i
            if (c > largest_qtty):
                largest_qtty = c
                largest_class = i

        minq = largest_qtty - largest_qtty * max_undersampling_ratio
        maxq = smallest_qtty + smallest_qtty * max_augmentation_ratio

        qtty_per_class = max(minq, maxq)
        logger.info('overall output samples per class: ' + str(qtty_per_class))

        logger.info('augmentation/undersampling ratio per class')
        self.ratio_classes = np.zeros(len(self.count_classes))
        for i, c in enumerate(self.count_classes):
            if (c == 0):
                self.ratio_classes[i] = 0
            else:
                self.ratio_classes[i] = qtty_per_class / c
            if (enforce_max_ratios):
                if (self.ratio_classes[i] < 1):
                    self.ratio_classes[i] = max((1 - max_undersampling_ratio),
                                                self.ratio_classes[i])
                elif (self.ratio_classes[i] > 1):
                    self.ratio_classes[i] = min(1 + max_augmentation_ratio,
                                                self.ratio_classes[i])

        self.ratio_classes = output_weight * self.ratio_classes
        self.setup_flow(start_ratio, end_ratio, batch_size=batch_size)
def terratest():
    stage = 'AWS Terraform Integration Testing (terratest)'
    logger.info('Calling {0}'.format(stage))
    call_os_command(['go', 'test', '-v', './tests'])
Ejemplo n.º 15
0
 def start(self):
     self._start = time()
     if (self._debug):
         logger.info('> [started] ' + self._name + '...')
Ejemplo n.º 16
0
 def stop(self):
     self._lastElapsed = (time() - self._start)
     if (self._debug):
         logger.info('> [done]    {} ({:.3f} ms)'.format(
             self._name, self._lastElapsed * 1000))
Ejemplo n.º 17
0
def show_images(image_list,
                image_labels=None,
                group_by_label=False,
                cols=4,
                name='image',
                output_dir=None,
                is_bgr=False,
                cmap=None,
                size=6):
    logger.info('showing ' + str(len(image_list)) + ' images')
    fig = plt.figure()
    rows = int(len(image_list) / cols) + 1
    t = Timer('generating image patches. rows=' + str(rows) + '; cols=' +
              str(cols))
    fig.set_size_inches(cols * size, rows * size)

    image_indexes = range(len(image_list))

    #order indexes by label
    if (group_by_label == True and image_labels != None):
        index_label_map = []
        for i, label in enumerate(image_labels):
            index_label_map.append((i, label))
        label_image_map = np.array(index_label_map,
                                   dtype=[('index', int), ('label', int)])
        label_image_map = np.sort(label_image_map, order='label')
        image_indexes = []
        for a in label_image_map:
            image_indexes.append(a[0])

    c = 0
    for i in image_indexes:
        im = image_list[i]
        if (is_bgr):
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        y = fig.add_subplot(rows, cols, c + 1)
        if (cmap == None):
            im = im.astype('uint8')
        y.imshow(im, cmap=cmap)

        if (image_labels != None):
            seed = int(
                int(
                    hashlib.md5(str(
                        image_labels[i]).encode('utf-8')).hexdigest(), 16) /
                999999999999999999999999999999)
            np.random.seed(seed)
            color = np.random.rand(3, 1)
            y.text(4,
                   17,
                   str(image_labels[i]),
                   fontsize=16,
                   style='normal',
                   bbox={
                       'facecolor': color,
                       'alpha': 1,
                       'pad': 4
                   })
            y.text(4,
                   np.shape(im)[1] - 7,
                   '[' + str(i) + ']',
                   fontsize=12,
                   style='normal')
            #y.add_patch(patches.Rectangle((0, 0), np.shape(im)[0]-1, np.shape(im)[1]-1, color=color, linewidth=4, fill=False))

        c = c + 1

    if (output_dir != None):
        f = output_dir + name + '.jpg'
        plt.savefig(f)
        plt.close(fig)
    else:
        plt.show()

    t.stop()
Ejemplo n.º 18
0
def evaluate_predictions(Y_true, Y_pred, detailed=True, class_labels=None):
    acc = metrics.accuracy_score(Y_true, Y_pred)
    logger.info('Accuracy: ' + str(acc))

    if (detailed):
        if (class_labels == None):
            unique_labels = np.unique(Y_true)
            class_labels = [str(s) for s in unique_labels]

        cm = metrics.confusion_matrix(Y_true, Y_pred, range(len(class_labels)))

        logger.info('Number of test samples: ' + str(len(Y_true)))
        logger.info('Kappa score: ' +
                    str(metrics.cohen_kappa_score(Y_true, Y_pred)) +
                    ' (-1 bad; 0 just luck; 1 great)')

        logger.info(
            '\n' +
            metrics.classification_report(Y_true,
                                          Y_pred,
                                          target_names=class_labels,
                                          labels=range(len(class_labels))))

        acc_class = cm.diagonal() / np.sum(cm, axis=0)
        logger.info('Accuracy per class:')
        for i, acc in enumerate(acc_class):
            logger.info(
                str('{}: {:.1f}%'.format(class_labels[i], acc_class[i] * 100)))

        logger.info('Confusion matrix:')
        logger.info('\n' + str(cm))
        plot_confusion_matrix(cm, class_labels=class_labels, size=2)
Ejemplo n.º 19
0
    def flow(self, max_samples=None, output_dtype='uint8'):
        logger.info('starting new flow...')
        if (np.sum(self.ratio_classes) == 0):
            raise StopIteration(
                'no item will be returned by this iterator. aborting')

        x_batch = np.array([], dtype=output_dtype)
        y_batch = np.array([], dtype=output_dtype)

        pending_augmentations = np.zeros(self.nr_classes, dtype='uint32')

        #process each source batch
        count_samples = 0
        for xs, ys in self.source_xy_generator.flow():
            if (self.change_y is not None):
                ys = change_classes(ys, self.change_y)

            y_labels = onehot_to_label(ys)

            for i, x in enumerate(xs):
                y = ys[i]

                if (max_samples != None and count_samples >= max_samples):
                    break

                label = y_labels[i]
                r = self.ratio_classes[label]

                #add sample
                if (r == 1):
                    x_batch, y_batch = self._add_to_batch(
                        x_batch, y_batch, x, y)
                    #                    logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size))
                    if (len(y_batch) >= self.batch_size):
                        #                        logger.info('yielding batch1')
                        yield x_batch, y_batch
                        x_batch = np.array([]).astype(output_dtype)
                        y_batch = np.array([]).astype(output_dtype)

                #undersample
                elif (r < 1):
                    #accept sample at the rate it should so we balance classes
                    rdm = random.random()
                    if (rdm <= r):
                        x_batch, y_batch = self._add_to_batch(
                            x_batch, y_batch, x, y)
                        #                        logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size))
                        if (len(y_batch) >= self.batch_size):
                            #                            logger.info('yielding batch2')
                            yield x_batch, y_batch
                            x_batch = np.array([]).astype(output_dtype)
                            y_batch = np.array([]).astype(output_dtype)

                #augmentation
                elif (r > 1):
                    #accept sample
                    x_batch, y_batch = self._add_to_batch(
                        x_batch, y_batch, x, y)
                    #                    logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size))
                    if (len(y_batch) >= self.batch_size):
                        #                        logger.info('yielding batch3')
                        yield x_batch, y_batch
                        x_batch = np.array([]).astype(output_dtype)
                        y_batch = np.array([]).astype(output_dtype)

                    pending_augmentations[label] += (r - 1)

                    #generate augmented copies of images so we balance classes
                    if (pending_augmentations[label] > 1):
                        x1 = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
                        x_orig = np.array([x1])
                        y_orig = np.array([y])

                        #                        show_image(x_orig[0], is_bgr=False)
                        ir = self.image_augmentation.flow(x_orig,
                                                          y_orig,
                                                          batch_size=1)
                        while (pending_augmentations[label] > 1):
                            it = ir.next()
                            x_it = it[0][0]
                            y_it = it[1][0]
                            x_it = cv2.cvtColor(x_it, cv2.COLOR_RGB2BGR)

                            x_batch, y_batch = self._add_to_batch(
                                x_batch, y_batch, x_it, y_it)
                            #                            logger.info('yielding batch ' + str(len(self.y_batch)) + ' ' + str(self.batch_size))
                            if (len(y_batch) >= self.batch_size):
                                #                                logger.info('yielding batch4')
                                yield x_batch, y_batch
                                x_batch = np.array([]).astype(output_dtype)
                                y_batch = np.array([]).astype(output_dtype)

                            pending_augmentations[label] -= 1
Ejemplo n.º 20
0
    def setup_flow(self, output_start_ratio, output_end_ratio, batch_size=64):
        if (output_start_ratio > output_end_ratio):
            raise Exception('output_start_ratio: start must be before end!')
        logger.info('SETUP FLOW {} {}'.format(output_start_ratio,
                                              output_end_ratio))

        output_total_size = 0
        for i, ratio in enumerate(self.ratio_classes):
            class_total = np.floor(self.count_classes[i] * ratio)
            output_total_size += class_total

        logger.info(
            'calculating source range according to start/end range of the desired output..'
        )
        output_pos = 0
        output_start_pos = int(np.ceil(output_total_size * output_start_ratio))
        output_end_pos = int(np.floor(output_total_size * output_end_ratio))
        self.size = output_end_pos - output_start_pos
        self.nr_batches = int(np.ceil(self.size / batch_size))
        self.batch_size = batch_size

        logger.info('output distribution for this flow')
        for i, ratio in enumerate(self.ratio_classes):
            class_total = np.floor(self.count_classes[i] * ratio)
            logger.info('{}: {} ({:.2f})'.format(
                i, int(class_total * (output_end_ratio - output_start_ratio)),
                ratio))

        source_start_pos = None
        source_end_pos = None

        for i, y_label in enumerate(self.Y_labels):
            r = self.ratio_classes[y_label]
            if (r == 1):
                output_pos += 1
            elif (r < 1):
                if (random.random() <= r):
                    output_pos += 1
            elif (r > 1):
                output_pos += r

            if (source_start_pos == None and output_pos >= output_start_pos):
                source_start_pos = i

            if (source_start_pos != None and output_pos <= output_end_pos):
                source_end_pos = i

        logger.info('source range: ' + str(source_start_pos) + '-' +
                    str(source_end_pos) + ' (' +
                    str(source_end_pos - source_start_pos) + ')')
        logger.info('output range: ' + str(output_start_pos) + '-' +
                    str(output_end_pos) + ' (' +
                    str(output_end_pos - output_start_pos) + ')')

        if 'setup_flow' in dir(self.source_xy_generator):
            self.source_xy_generator.setup_flow(source_start_pos,
                                                source_end_pos)
Ejemplo n.º 21
0
def terraform_cloud_setup():
    terraform_token_file = "~/.terraform.d/credentials.tfrc.json"

    logger.info('Writing auth token')
    write_token(terraform_token_file, os.environ['TERRAFORM_CLOUD_TOKEN'])
Ejemplo n.º 22
0
def validate_xy_dataset(dataset_file, save_dir=None):
    ok = True
    logger.info('VALIDATING DATASET ' + dataset_file)

    with h5py.File(dataset_file, 'r') as h5f:
        x_ds = h5f['X']
        y_ds = h5f['Y']

        if (len(x_ds) != len(y_ds)):
            logger.warning(
                'VALIDATION ERROR: x and y datasets with different lengths')
            ok = False

        for px in range(len(x_ds)):
            arr = np.array(x_ds[px])
            if (not np.any(arr)):
                logger.warning('VALIDATION ERROR: Image not found at index=' +
                               str(px))
                ok = False

        label_total = np.array([[0, 0]])
        for py in range(len(y_ds)):
            arr = np.array(y_ds[py])
            label_total = arr + label_total
            if (not np.any(arr) or np.all(arr) or arr[0] == arr[1]):
                logger.warning(
                    'VALIDATION ERROR: Invalid label found at index=' +
                    str(py) + ' label=' + str(arr))
                ok = False

        label0_ratio = label_total[0][0] / len(y_ds)
        label1_ratio = label_total[0][1] / len(y_ds)

        logger.info('Summary')
        logger.info('X shape=' + str(x_ds.shape))
        logger.info('Y shape=' + str(y_ds.shape))
        logger.info('Y: total: ' + str(len(y_ds)))
        logger.info('Y: label 0: ' + str(label_total[0][0]) + ' ' +
                    str(100 * label0_ratio) + '%')
        logger.info('Y: label 1: ' + str(label_total[0][1]) + ' ' +
                    str(100 * label1_ratio) + '%')

        logger.info('Recording sample data')
        size = len(x_ds)
        qtty = min(3, size)
        f = size / qtty
        for i in range(qtty):
            pi = round(i * f)
            logger.info('image_index ' + str(pi))
            logger.info('x=')
            if (save_dir != None):
                mkdirs(save_dir)
                show_slices(x_ds[pi],
                            str(i) + str(y_ds[pi]),
                            output_dir=save_dir)
                logger.info('y=' + str(y_ds[pi]))
    return ok
Ejemplo n.º 23
0
def export_lions(image_raw,
                 image_dotted,
                 target_x_ds,
                 target_y_ds,
                 image_dims,
                 debug=False,
                 min_distance_others=50,
                 non_lion_distance=150,
                 export_non_lion=True):

    NR_CLASSES = 6

    #BLACKOUT PORTIONS OF IMAGE IN RAW PICTURE
    image_dotted_bw = cv2.cvtColor(image_dotted, cv2.COLOR_BGR2GRAY)
    #utils.show_image(image_dotted_bw, size=8)

    mask = cv2.threshold(image_dotted_bw, 5, 255, cv2.THRESH_BINARY)[1]
    #utils.show_image(mask, size=8)

    image_raw_bw = cv2.cvtColor(image_raw, cv2.COLOR_BGR2GRAY)
    image_raw = cv2.bitwise_and(image_raw, image_raw, mask=mask)
    #utils.show_image(image_raw, size=8, is_bgr=True)

    #ISOLATE HUMAN MARKS ON DOTTED PICTURE
    diff_color = cv2.absdiff(image_dotted, image_raw)
    diff = cv2.cvtColor(diff_color, cv2.COLOR_BGR2GRAY)

    kernel = np.ones((2, 2), np.uint8)
    diff = cv2.morphologyEx(diff, cv2.MORPH_OPEN, kernel)
    ret, diff = cv2.threshold(diff, 10, 255, cv2.THRESH_TOZERO)
    ret, diff = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY)

    #debug data
    debug_image = image_dotted.copy()
    images = []

    #find all dotted sea lions
    count1 = 0
    count_class = np.zeros(NR_CLASSES)
    lion_positions = []
    lion_classes = []
    im2, contours, hierarchy = cv2.findContours(diff, cv2.RETR_LIST,
                                                cv2.CHAIN_APPROX_SIMPLE)
    for c in contours:
        x, y, w, h = cv2.boundingRect(c)
        if (w > 4 and h > 4):
            count1 = count1 + 1
            center = (x + round(w / 3), y + round(h / 3))
            clazz = find_class(image_dotted, center)

            if (clazz == -1):
                logger.warning('could not detect sea lion class at ' +
                               str(center))
                continue

            lion_positions.append(center)

            count_class[clazz] = count_class[clazz] + 1
            lion_classes.append(clazz)

            if (debug):
                cv2.circle(debug_image, center, round(w / 2), (255, 0, 0), 1)

    count_class_added = np.zeros(NR_CLASSES)

    #add found sea lions to training dataset
    #filter out lions that are too near each other to minimize noise on training set
    count2 = 0
    for i, lion_pos in enumerate(lion_positions):

        lion_class = lion_classes[i]

        is_far = True
        if (min_distance_others > 0):
            is_far = utils.is_far_from_others(lion_pos, lion_positions,
                                              min_distance_others)

        if (is_far):
            #export patch to train dataset
            count2 = count2 + 1
            pw = round(image_dims[1] / 2)
            ph = image_dims[1] - pw
            #trainX = image_raw[lion_pos[1]-pw:lion_pos[1]+ph,lion_pos[0]-pw:lion_pos[0]+ph]
            trainX = utils.crop_image_fill(
                image_raw, (lion_pos[1] - pw, lion_pos[0] - pw),
                (lion_pos[1] + ph, lion_pos[0] + ph))

            m = np.mean(trainX)

            if (m > 30 and m < 225 and m != 127):

                if (debug):
                    images.append(trainX)
                    cv2.circle(debug_image, lion_pos, round(w / 2),
                               (0, 0, 255), 2)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    cv2.putText(debug_image, str(lion_class), lion_pos, font,
                                1.1, (255, 255, 255), 2, cv2.LINE_AA)

                #normalize between 0-1
                #trainX = trainX/255
                trainY = keras.utils.np_utils.to_categorical([lion_class],
                                                             NR_CLASSES)[0]
                if (target_x_ds != None and target_y_ds != None):
                    utils.add_sample_to_dataset(target_x_ds, target_y_ds,
                                                trainX, trainY)
                count_class_added[
                    lion_class] = count_class_added[lion_class] + 1

    #identify non sea lion patches
    count3 = 0
    if (export_non_lion):
        s = np.shape(image_raw)
        for i in range(int(count2 * 1.1)):
            patch_pos = (random.randint(image_dims[1] * 2,
                                        s[1] - image_dims[1] * 2),
                         random.randint(image_dims[0] * 2,
                                        s[0] - image_dims[0] * 2))
            is_far = utils.is_far_from_others(patch_pos, lion_positions,
                                              non_lion_distance)

            if (is_far):
                #export patch to train dataset
                pw = round(image_dims[1] / 2)
                ph = image_dims[1] - pw
                #trainX = image_raw[lion_pos[1]-pw:lion_pos[1]+ph,lion_pos[0]-pw:lion_pos[0]+ph]
                trainX = utils.crop_image_fill(
                    image_raw, (patch_pos[1] - pw, patch_pos[0] - pw),
                    (patch_pos[1] + ph, patch_pos[0] + ph))

                m = np.mean(trainX)

                if (m > 50 and m < 200):
                    count3 = count3 + 1
                    if (debug):
                        images.append(trainX)
                        cv2.circle(debug_image, patch_pos, round(w / 2),
                                   (0, 255, 0), 3)

                    #normalize between 0-1
                    #trainX = trainX/255
                    trainY = keras.utils.np_utils.to_categorical([5],
                                                                 NR_CLASSES)[0]
                    if (target_x_ds != None and target_y_ds != None):
                        utils.add_sample_to_dataset(target_x_ds, target_y_ds,
                                                    trainX, trainY)
                    count_class[5] = count_class[5] + 1
                    count_class_added[5] = count_class_added[5] + 1

    logger.info('sea lions found: ' + str(count1))
    logger.info('sea lions added to dataset: ' + str(count2))
    logger.info('non sea lions added to dataset: ' + str(count3))
    if (target_x_ds != None and target_y_ds != None):
        logger.info('dataset size: ' + str(len(target_x_ds)))

    if (debug):
        utils.show_image(debug_image, size=40, is_bgr=True)
        utils.show_images(images, cols=10, is_bgr=True, size=1.5)

    return count_class, count_class_added, lion_positions, lion_classes