def extract_vectors_from_dataset(self, out_filename, type="test"): print("creating vectors ({})...".format(type)) filename_suffix = "" if type == "test": dts = self.dataset.test elif type == "all": dts = self.dataset.all filename_suffix += "_all" else: raise ValueError("Dataset type is wrong ('type' param)") vct_file_name = dsf.file_cut_extension(out_filename) + filename_suffix + '.vct' labels_file_name = dsf.file_cut_extension(out_filename) + filename_suffix + '.labels' if 'features_layer' not in self.graph: get_size_vector = self._extract_features(dts.images[:1]) size_vector = numpy.prod(get_size_vector.shape) else: size_vector = numpy.prod(self.graph['features_layer'].get_shape().as_list()[1:]) with open(vct_file_name, 'wb') as foutVectors: dsf.write_meta(foutVectors, [0, 0, len(dts.images), size_vector]) if dts.labels is not None: fout_labels = open(labels_file_name, 'wb') dsf.write_meta(fout_labels, [len(dts.labels), self.dataset.params['numClasses']]) test_vectors = [] # data for embedings need to be stored in saved TF model in a Variable # variable will be named with dataset file_name suffix for test_input_batch, test_labels_batch in dts.split_to_batches(self.params['training_batch_size']): if dts.labels is not None: if len(test_labels_batch.shape) != 1: test_labels_batch = dsf.one_hot_to_dense(test_labels_batch) converted_batch = self._extract_features(test_input_batch) test_vectors.append(converted_batch) converted_batch.flatten().tofile(foutVectors) if dts.labels is not None: test_labels_batch.tofile(fout_labels) if dts.labels is not None: fout_labels.close() test_vectors = numpy.concatenate(test_vectors, axis=0) if type == "test": self._setup_embedding_vector( index=dsf.extract_file_name_cut_extension(dsf.file_cut_extension(out_filename)).replace("-", ""), vectors=test_vectors) #self.session.run([self.graph['embeded_vector_assign']], {self.embeddingInput: test_vectors}) # self.saver = tf.train.Saver(max_to_keep=2) if 'embeded_vectors' in self.graph: tf.train.Saver(self.graph['embeded_vectors'], max_to_keep=2).save(self.session, os.path.join(self._log_dir, "modelEmbeded.ckpt"), 100000) print("vectors saved to files!")
def convert_mnist(image_path, labels_path): with gfile.Open(image_path, 'rb') as f: images = extract_images_mnist(f) with open("data/mnist/mnist.dat", 'wb') as foutPatches: dsf.write_meta(foutPatches, [ 0, 0, images.shape[0], images.shape[1], images.shape[2], images.shape[3] ]) images.flatten().tofile(foutPatches) print("image '{}', patch size - {}x{}, patches - {}. Done.".format( image_path, images.shape[1], images.shape[2], images.shape[0])) with gfile.Open(labels_path, 'rb') as f: labels = extract_labels_mnist(f) with open("data/mnist/mnist.labels", 'wb') as foutLabels: counts = numpy.unique(labels) dsf.write_meta(foutLabels, [labels.shape[0], len(counts)]) labels.tofile(foutLabels)
def patches2dat(image_path, patch_size, stride): image_name = path_leaf(image_path) patches_name = dsf.file_cut_extension(image_name) + "_" + str( patch_size) + ".dat" mask_name = dsf.file_cut_extension(image_path) + "_mask.bmp" labels_name = dsf.file_cut_extension(image_name) + "_" + str( patch_size) + ".labels" patches, img_shape = tf_slice_pic_to_patches(image_path, patch_size, stride) with open("data/sparc/" + patches_name, 'wb') as foutPatches: # numRowsPatches, numColsPatches, totalPatches, patchSizeX, patchsizeY, channels dsf.write_meta(foutPatches, [ patches.shape[1], patches.shape[2], patches.shape[1] * patches.shape[2], patch_size, patch_size, img_shape[2] ]) patches.flatten().tofile(foutPatches) if os.path.isfile(mask_name): mask_patches, mask_shape = tf_slice_pic_to_patches( mask_name, patch_size, stride) mask_patches = mask_patches.astype(numpy.uint8).reshape( -1, patch_size**2) mask_patches = numpy.sum(mask_patches, axis=-1) cut_limit = numpy.vectorize(lambda e: (1 if e > 35 * 255 else 0)) mask_patches = cut_limit(mask_patches) mask_patches = mask_patches.astype(numpy.uint8) with open("data/sparc/" + labels_name, 'wb') as foutPatches: # numRowsPatches, numColsPatches, totalPatches, patchSizeX, patchsizeY, channels classes, per_class = numpy.unique(mask_patches, return_counts=True) dsf.write_meta(foutPatches, [mask_patches.shape[0], len(classes)]) mask_patches.tofile(foutPatches) print(per_class) else: mask_patches = None print( "image '{}', patch size - {}x{}, patches - {}x{}, labels - {}. Done.". format(image_path, patch_size, patch_size, patches.shape[1], patches.shape[2], mask_patches.shape[0] if mask_patches is not None else "None"))
def convert_cifar(image_path): def rgb2gray(rgb): return numpy.dot(rgb[..., :3], [0.299, 0.587, 0.114]).astype(numpy.uint8) with open("data/cifar/cifar.dat", 'wb') as foutPatches: dsf.write_meta(foutPatches, [0, 0, 50000, 32, 32, 1]) with open("data/cifar/cifar.labels", 'wb') as foutLabels: dsf.write_meta(foutLabels, [50000, 10]) for i in range(1, 6): with open(image_path + str(i), 'rb') as f: dict = pickle.load(f, encoding='bytes') shape = int(math.sqrt(dict[b'data'].shape[1] / 3)) with open("data/cifar/cifar.dat", 'ab') as foutPatches: images = rgb2gray(dict[b'data'].reshape( (dict[b'data'].shape[0], shape, shape, 3), order='F')) images.flatten().tofile(foutPatches) with open("data/cifar/cifar.labels", 'ab') as foutLabels: labels = numpy.asarray(dict[b'labels']).astype(numpy.uint8) labels.tofile(foutLabels) print("image '{}', patch size - {}x{}, patches - {}. Done.".format( image_path, 32, 32, 50000))
def convert_bardot(img_path, classes_limit=None, min_pics_in_class=None, max_pics_in_class=None, exclude_file_path=None, resize=None, exclude_array=None): excluded_classes = None if exclude_file_path is not None: exclude_file_path = dsf.file_cut_extension(exclude_file_path) excluded_classes = json.load(open(exclude_file_path + '.json'))['classes'] print("converting classes with filter of {} classes".format( len(excluded_classes))) images = [] img_size = None if img_path[:-1] != '/': img_path += '/' with open("data/bardot/bardot.dat.temp", 'wb') as foutPatches: with open("data/bardot/bardot.labels.temp", 'wb') as foutLabels: with open("data/bardot/bardot.meta", 'w') as foutMeta: image_count = 0 for dir in subdirs(img_path): cat = re.match(r'^[A-Z]+ [0-9]+', dir) if not cat: print(dir + " -- bad category") continue cat = cat.group() if exclude_file_path is not None and cat in excluded_classes \ or (exclude_array is not None and cat in exclude_array): print(dir + " -- excluded category") continue cat_dir = os.listdir(img_path + dir) # class has enough images? if min_pics_in_class is not None and len( cat_dir) < min_pics_in_class: continue # exceed limit of classes? if cat not in images: if classes_limit is None or classes_limit > len( images): images.append(cat) else: continue # exceeds number of pics? if max_pics_in_class is not None: cat_dir = cat_dir[:max_pics_in_class] for entry in cat_dir: if (not re.match(r'.+\.png', entry)): continue value = images.index(cat) foutLabels.write(value.to_bytes(1, byteorder='little')) print("{}\t{}".format(entry, cat), file=foutMeta) img = Image.open(img_path + dir + "/" + entry) if resize is not None: img = img.resize((resize, resize)) img_numpy = numpy.asarray(img) img_size = img_numpy.shape img_numpy.flatten().tofile(foutPatches) image_count += 1 if img_size is None: raise Exception("No eligible pictures found!") if exclude_file_path is None: out_file_base = "data/bardot/bardot" else: out_file_base = exclude_file_path + "-aug" if resize is not None: out_file_base += "-{}x{}".format(resize, resize) dsf.write_cfg("{}-{}.json".format(out_file_base, len(images)), {"classes": images}) with open("{}-{}.dat".format(out_file_base, len(images)), 'wb') as foutPatches: with open("data/bardot/bardot.dat.temp", 'rb') as finPatches: dsf.write_meta(foutPatches, [ 0, 0, image_count, img_size[0], img_size[1], img_size[2] if 2 in img_size else 1 ]) bytes = finPatches.read(100000) while bytes: foutPatches.write(bytes) bytes = finPatches.read(100000) os.remove("data/bardot/bardot.dat.temp") with open("{}-{}.labels".format(out_file_base, len(images)), 'wb') as foutLabels: with open("data/bardot/bardot.labels.temp", 'rb') as finLabels: dsf.write_meta(foutLabels, [image_count, len(images)]) bytes = finLabels.read(100000) while bytes: foutLabels.write(bytes) bytes = finLabels.read(100000) os.remove("data/bardot/bardot.labels.temp") os.rename("data/bardot/bardot.meta", "{}-{}.meta".format(out_file_base, len(images))) print("{} images of {}x{}x{} from {} classes converted".format( image_count, img_size[0], img_size[1], img_size[2] if 2 in img_size else 1, len(images)))