Ejemplo n.º 1
0
    def __getitem__(self, index):
        start = self.batch_size * index
        end = min(start + self.batch_size, len(self.match) + len(self.unmatch))
        size = end - start
        assert size > 0

        def _read_for_training_expand_dim(p):
            return np.expand_dims(read_for_training(p), 0).astype(K.floatx())

        i_start, i_end = start // 2, int(start // 2 + np.ceil(size / 2))
        unroll_match = [[m[0], m[1], u[0], u[1]] for m, u in zip(
            self.match[i_start:i_end], self.unmatch[i_start:i_end])]
        unroll_match = [i for item in unroll_match for i in item]
        unroll_train_images = np.concatenate(
            self.p.map(_read_for_training_expand_dim, unroll_match), 0)
        #print("\nunroll_train_images shape: {}\n".format(unroll_train_images.shape))
        unroll_train_images = preprocess_image(unroll_train_images)

        ## a[0], b[0], a[1], b[1], a[2], b[2], a[3], a[4] ...
        a_indices = list(range(0, len(unroll_train_images), 2))
        a = unroll_train_images[a_indices]
        b_indices = list(range(1, len(unroll_train_images), 2))
        b = unroll_train_images[b_indices]
        c = np.zeros((size, 1), dtype=K.floatx())
        for i in range(0, size, 2):
            c[i, 0] = 1  # This is a match
            c[i + 1, 0] = 0  # Different whales

        return [a, b], c
def process_train_images_with_branch_model(branch_model, fold):
    cache_path = CACHE_PATH + 'branch_res_train_fold_{}.pklz'.format(fold)
    if not os.path.isfile(cache_path):
        print('Go branch model train...')
        start_time = time.time()
        bboxes = get_boxes()
        CLASSES = get_classes_array()
        tagged = get_tagged_data()
        train_ids = []
        labels = dict()
        train_size = len(tagged)
        missed = 0
        # train_size = 10
        train_res = []
        i = 0
        for image_id in sorted(list(tagged.keys())):
            clss = tagged[image_id]
            # print('Go for: {}'.format(image_id))
            f = expand_path(image_id)
            lbl = CLASSES.index(clss)
            if image_id not in bboxes:
                print('Missed bbox for {}!'.format(image_id))
                missed += 1
                continue
            bb = bboxes[image_id]
            labels[image_id] = lbl
            train_ids.append(image_id)
            img = read_cropped_image(f,
                                     bb[0],
                                     bb[1],
                                     bb[2],
                                     bb[3],
                                     False,
                                     img_shape=(BOX_SIZE, BOX_SIZE, 3))
            img = np.expand_dims(img, axis=0)
            img = preprocess_image(img.astype(np.float32))
            preds = branch_model.predict(img)
            train_res.append(preds[0])
            i += 1

        train_res = np.array(train_res)
        train_ids = np.array(train_ids)

        print('Train preds: {}'.format(train_res.shape))
        print('Missed bboxes: {}'.format(missed))
        print('Read train time: {:.0f} sec'.format(time.time() - start_time))
        save_in_file((train_res, train_ids, labels), cache_path)
    else:
        print('Restore train from cache: {}'.format(cache_path))
        train_res, train_ids, labels = load_from_file(cache_path)

    return train_res, train_ids, labels
Ejemplo n.º 3
0
    def __getitem__(self, index):
        start = self.batch_size * index
        size = min(len(self.data) - start, self.batch_size)

        def _read_for_validation_expand_dim(p):
            return np.expand_dims(read_for_validation(p), 0).astype(K.floatx())

        res = self.p.map(_read_for_validation_expand_dim,
                         self.data[start:start + size])
        res = np.concatenate(res, 0)
        res = preprocess_image(res)

        return res
def process_tst_images_with_branch_model(branch_model, fold):
    cache_path = CACHE_PATH + 'branch_res_test_fold_{}.pklz'.format(fold)
    if not os.path.isfile(cache_path):
        print('Go branch model test...')
        start_time = time.time()
        bboxes = get_boxes()
        missed = 0
        test_df = pd.read_csv(INPUT_PATH + 'sample_submission.csv')
        test_ids = []
        test_size = len(test_df)
        # test_size = 10
        test_res = []
        i = 0
        for index, row in test_df.iterrows():
            # print('Go for: {}'.format(row['Image']))
            f = INPUT_PATH + 'test/' + row['Image']
            image_id = row['Image']
            if image_id in bboxes:
                bb = bboxes[image_id]
            else:
                print('Missed bbox for {}!'.format(image_id))
                missed += 1
                img = read_single_image(f)
                bb = (0, 0, img.shape[1], img.shape[0])
            img = read_cropped_image(f,
                                     bb[0],
                                     bb[1],
                                     bb[2],
                                     bb[3],
                                     False,
                                     img_shape=(BOX_SIZE, BOX_SIZE, 3))
            img = np.expand_dims(img, axis=0)
            img = preprocess_image(img.astype(np.float32))
            preds = branch_model.predict(img)
            test_res.append(preds[0])
            test_ids.append(row['Image'])
            i += 1
        test_res = np.array(test_res)
        test_ids = np.array(test_ids)

        print('Test preds: {}'.format(test_res.shape))
        print('Missed bboxes: {}'.format(missed))
        print('Read test time: {:.0f} sec'.format(time.time() - start_time))
        save_in_file((test_res, test_ids), cache_path)
    else:
        print('Restore test from cache: {}'.format(cache_path))
        test_res, test_ids = load_from_file(cache_path)

    return test_res, test_ids