def extract_vectors_from_dataset(self, out_filename, type="test"):
        print("creating vectors ({})...".format(type))
        filename_suffix = ""
        if type == "test":
            dts = self.dataset.test
        elif type == "all":
            dts = self.dataset.all
            filename_suffix += "_all"
        else:
            raise ValueError("Dataset type is wrong ('type' param)")

        vct_file_name = dsf.file_cut_extension(out_filename) + filename_suffix + '.vct'

        labels_file_name = dsf.file_cut_extension(out_filename) + filename_suffix + '.labels'
        if 'features_layer' not in self.graph:
            get_size_vector = self._extract_features(dts.images[:1])
            size_vector = numpy.prod(get_size_vector.shape)
        else:
            size_vector = numpy.prod(self.graph['features_layer'].get_shape().as_list()[1:])

        with open(vct_file_name, 'wb') as foutVectors:
            dsf.write_meta(foutVectors,
                           [0, 0, len(dts.images), size_vector])

            if dts.labels is not None:
                fout_labels = open(labels_file_name, 'wb')
                dsf.write_meta(fout_labels, [len(dts.labels), self.dataset.params['numClasses']])
            test_vectors = []
            # data for embedings need to be stored in saved TF model in a Variable
            # variable will be named with dataset file_name suffix

            for test_input_batch, test_labels_batch in dts.split_to_batches(self.params['training_batch_size']):
                if dts.labels is not None:
                    if len(test_labels_batch.shape) != 1:
                        test_labels_batch = dsf.one_hot_to_dense(test_labels_batch)
                converted_batch = self._extract_features(test_input_batch)
                test_vectors.append(converted_batch)
                converted_batch.flatten().tofile(foutVectors)
                if dts.labels is not None:
                    test_labels_batch.tofile(fout_labels)
            if dts.labels is not None:
                fout_labels.close()

            test_vectors = numpy.concatenate(test_vectors, axis=0)
            if type == "test":
                self._setup_embedding_vector(
                    index=dsf.extract_file_name_cut_extension(dsf.file_cut_extension(out_filename)).replace("-", ""),
                    vectors=test_vectors)
                #self.session.run([self.graph['embeded_vector_assign']], {self.embeddingInput: test_vectors})
            # self.saver = tf.train.Saver(max_to_keep=2)
            if 'embeded_vectors' in self.graph:
                tf.train.Saver(self.graph['embeded_vectors'], max_to_keep=2).save(self.session,
                                                                              os.path.join(self._log_dir,
                                                                                           "modelEmbeded.ckpt"),
                                                                              100000)

        print("vectors saved to files!")
Ejemplo n.º 2
0
def convert_mnist(image_path, labels_path):
    with gfile.Open(image_path, 'rb') as f:
        images = extract_images_mnist(f)
    with open("data/mnist/mnist.dat", 'wb') as foutPatches:
        dsf.write_meta(foutPatches, [
            0, 0, images.shape[0], images.shape[1], images.shape[2],
            images.shape[3]
        ])
        images.flatten().tofile(foutPatches)
    print("image '{}', patch size - {}x{}, patches - {}. Done.".format(
        image_path, images.shape[1], images.shape[2], images.shape[0]))
    with gfile.Open(labels_path, 'rb') as f:
        labels = extract_labels_mnist(f)
    with open("data/mnist/mnist.labels", 'wb') as foutLabels:
        counts = numpy.unique(labels)
        dsf.write_meta(foutLabels, [labels.shape[0], len(counts)])
        labels.tofile(foutLabels)
Ejemplo n.º 3
0
def patches2dat(image_path, patch_size, stride):
    image_name = path_leaf(image_path)
    patches_name = dsf.file_cut_extension(image_name) + "_" + str(
        patch_size) + ".dat"
    mask_name = dsf.file_cut_extension(image_path) + "_mask.bmp"
    labels_name = dsf.file_cut_extension(image_name) + "_" + str(
        patch_size) + ".labels"
    patches, img_shape = tf_slice_pic_to_patches(image_path, patch_size,
                                                 stride)
    with open("data/sparc/" + patches_name, 'wb') as foutPatches:
        # numRowsPatches, numColsPatches, totalPatches, patchSizeX, patchsizeY, channels
        dsf.write_meta(foutPatches, [
            patches.shape[1], patches.shape[2], patches.shape[1] *
            patches.shape[2], patch_size, patch_size, img_shape[2]
        ])
        patches.flatten().tofile(foutPatches)
    if os.path.isfile(mask_name):
        mask_patches, mask_shape = tf_slice_pic_to_patches(
            mask_name, patch_size, stride)
        mask_patches = mask_patches.astype(numpy.uint8).reshape(
            -1, patch_size**2)
        mask_patches = numpy.sum(mask_patches, axis=-1)
        cut_limit = numpy.vectorize(lambda e: (1 if e > 35 * 255 else 0))
        mask_patches = cut_limit(mask_patches)
        mask_patches = mask_patches.astype(numpy.uint8)
        with open("data/sparc/" + labels_name, 'wb') as foutPatches:
            # numRowsPatches, numColsPatches, totalPatches, patchSizeX, patchsizeY, channels
            classes, per_class = numpy.unique(mask_patches, return_counts=True)
            dsf.write_meta(foutPatches, [mask_patches.shape[0], len(classes)])
            mask_patches.tofile(foutPatches)
        print(per_class)
    else:
        mask_patches = None
    print(
        "image '{}', patch size - {}x{}, patches - {}x{}, labels - {}. Done.".
        format(image_path, patch_size, patch_size, patches.shape[1],
               patches.shape[2],
               mask_patches.shape[0] if mask_patches is not None else "None"))
Ejemplo n.º 4
0
def convert_cifar(image_path):
    def rgb2gray(rgb):
        return numpy.dot(rgb[..., :3],
                         [0.299, 0.587, 0.114]).astype(numpy.uint8)

    with open("data/cifar/cifar.dat", 'wb') as foutPatches:
        dsf.write_meta(foutPatches, [0, 0, 50000, 32, 32, 1])

    with open("data/cifar/cifar.labels", 'wb') as foutLabels:
        dsf.write_meta(foutLabels, [50000, 10])
    for i in range(1, 6):
        with open(image_path + str(i), 'rb') as f:
            dict = pickle.load(f, encoding='bytes')
            shape = int(math.sqrt(dict[b'data'].shape[1] / 3))
        with open("data/cifar/cifar.dat", 'ab') as foutPatches:
            images = rgb2gray(dict[b'data'].reshape(
                (dict[b'data'].shape[0], shape, shape, 3), order='F'))
            images.flatten().tofile(foutPatches)
        with open("data/cifar/cifar.labels", 'ab') as foutLabels:
            labels = numpy.asarray(dict[b'labels']).astype(numpy.uint8)
            labels.tofile(foutLabels)
    print("image '{}', patch size - {}x{}, patches - {}. Done.".format(
        image_path, 32, 32, 50000))
Ejemplo n.º 5
0
def convert_bardot(img_path,
                   classes_limit=None,
                   min_pics_in_class=None,
                   max_pics_in_class=None,
                   exclude_file_path=None,
                   resize=None,
                   exclude_array=None):
    excluded_classes = None
    if exclude_file_path is not None:
        exclude_file_path = dsf.file_cut_extension(exclude_file_path)
        excluded_classes = json.load(open(exclude_file_path +
                                          '.json'))['classes']
        print("converting classes with filter of {} classes".format(
            len(excluded_classes)))
    images = []
    img_size = None
    if img_path[:-1] != '/':
        img_path += '/'
    with open("data/bardot/bardot.dat.temp", 'wb') as foutPatches:
        with open("data/bardot/bardot.labels.temp", 'wb') as foutLabels:
            with open("data/bardot/bardot.meta", 'w') as foutMeta:
                image_count = 0
                for dir in subdirs(img_path):
                    cat = re.match(r'^[A-Z]+ [0-9]+', dir)
                    if not cat:
                        print(dir + " -- bad category")
                        continue
                    cat = cat.group()
                    if exclude_file_path is not None and cat in excluded_classes \
                            or (exclude_array is not None and cat in exclude_array):
                        print(dir + " -- excluded category")
                        continue
                    cat_dir = os.listdir(img_path + dir)
                    # class has enough images?
                    if min_pics_in_class is not None and len(
                            cat_dir) < min_pics_in_class:
                        continue
                    # exceed limit of classes?
                    if cat not in images:
                        if classes_limit is None or classes_limit > len(
                                images):
                            images.append(cat)
                        else:
                            continue
                    # exceeds number of pics?
                    if max_pics_in_class is not None:
                        cat_dir = cat_dir[:max_pics_in_class]

                    for entry in cat_dir:
                        if (not re.match(r'.+\.png', entry)):
                            continue
                        value = images.index(cat)
                        foutLabels.write(value.to_bytes(1, byteorder='little'))
                        print("{}\t{}".format(entry, cat), file=foutMeta)
                        img = Image.open(img_path + dir + "/" + entry)
                        if resize is not None:
                            img = img.resize((resize, resize))
                        img_numpy = numpy.asarray(img)
                        img_size = img_numpy.shape
                        img_numpy.flatten().tofile(foutPatches)
                        image_count += 1
    if img_size is None:
        raise Exception("No eligible pictures found!")
    if exclude_file_path is None:
        out_file_base = "data/bardot/bardot"
    else:
        out_file_base = exclude_file_path + "-aug"

    if resize is not None:
        out_file_base += "-{}x{}".format(resize, resize)
    dsf.write_cfg("{}-{}.json".format(out_file_base, len(images)),
                  {"classes": images})
    with open("{}-{}.dat".format(out_file_base, len(images)),
              'wb') as foutPatches:
        with open("data/bardot/bardot.dat.temp", 'rb') as finPatches:
            dsf.write_meta(foutPatches, [
                0, 0, image_count, img_size[0], img_size[1],
                img_size[2] if 2 in img_size else 1
            ])
            bytes = finPatches.read(100000)
            while bytes:
                foutPatches.write(bytes)
                bytes = finPatches.read(100000)
    os.remove("data/bardot/bardot.dat.temp")

    with open("{}-{}.labels".format(out_file_base, len(images)),
              'wb') as foutLabels:
        with open("data/bardot/bardot.labels.temp", 'rb') as finLabels:
            dsf.write_meta(foutLabels, [image_count, len(images)])
            bytes = finLabels.read(100000)
            while bytes:
                foutLabels.write(bytes)
                bytes = finLabels.read(100000)
    os.remove("data/bardot/bardot.labels.temp")
    os.rename("data/bardot/bardot.meta",
              "{}-{}.meta".format(out_file_base, len(images)))
    print("{} images of {}x{}x{} from {} classes converted".format(
        image_count, img_size[0], img_size[1],
        img_size[2] if 2 in img_size else 1, len(images)))