def initialize(self, device, **kwargs): if not self.input or len(self.input.shape) != 4: raise error.BadFormatError( "input should be assigned and have shape of 4: " "(n_samples, sy, sx, n_channels)") if self.padding[0] < 0 or self.padding[1] < 0: raise error.BadFormatError( "padding[0], padding[1] should not be less than zero") if not self.err_output: raise error.BadFormatError("err_output should be assigned") super(GDCutter, self).initialize(device=device, **kwargs) sh = list(self.input.shape) sh[2] -= self.padding[0] + self.padding[2] sh[1] -= self.padding[1] + self.padding[3] if sh[2] <= 0 or sh[1] <= 0: raise error.BadFormatError("Resulted output shape is empty") output_size = int(numpy.prod(sh)) if self.err_output.size != output_size: raise error.BadFormatError( "Computed err_output size differs from an assigned one") self.output_shape = sh if not self.err_input: self.err_input.reset(numpy.zeros_like(self.input.mem)) else: assert self.err_input.shape == self.input.shape self.init_vectors(self.err_output, self.err_input) self.create_stuff("dst")
def initialize(self, device, **kwargs): if not self.input or len(self.input.shape) != 4: raise error.BadFormatError( "input should be assigned and have shape of 4: " "(n_samples, sy, sx, n_channels)") if self.padding[0] < 0 or self.padding[1] < 0: raise error.BadFormatError( "padding[0], padding[1] should not be less than zero") super(Cutter, self).initialize(device=device, **kwargs) shape = list(self.input.shape) shape[2] -= self.padding[0] + self.padding[2] shape[1] -= self.padding[1] + self.padding[3] if shape[2] <= 0 or shape[1] <= 0: raise error.BadFormatError("Resulted output shape is empty") self.output_shape = shape if not self.output: self.output.reset(numpy.zeros(self.output_shape, self.input.dtype)) else: assert self.output.shape == self.output_shape for vec in self.input, self.output: vec.initialize(self.device) self.create_stuff("src")
def run(self): if self.type == "relu": self.max = 10000 self.min = 0 elif self.type == "tanh": self.max = 1.7159 self.min = -1.7159 else: raise error.BadFormatError("Unsupported type %s" % self.type) d = self.max - self.min if not d: return self.output.map_write() self.input.map_read() d = (self.bars - 1) / d if self.reset_flag: self.output.mem[:] = 0 self.n_bars[0] = self.bars + 2 for y in self.input.mem.ravel(): if y < self.min: self.output[0] += 1 continue if y <= self.max and y > self.min: i = int(numpy.floor((y - self.min) * d)) self.output[i] += 1 continue self.output[self.bars + 1] += 1
def initialize(self, device, **kwargs): if (not self.input or (self.output is not None and eq_addr(self.input.mem, self.output.mem))): raise error.BadFormatError( "input should be set and should not be equal to output") super(BackwardTanhLog, self).initialize(device=device, **kwargs) self.output.initialize(self.device)
def load_data(self): pbar = ProgressBar(maxval=sum(len(p) for p in self._pickles), term_width=40) self.info("Loading %d pickles...", pbar.maxval) pbar.start() loaded = [ self.load_pickles(i, self._pickles[i], pbar) for i in range(3) ] pbar.finish() self.info("Initializing the arrays...") shape = loaded[2][1][0].shape[1:] for i in range(2): if loaded[i][0] > 0: shi = loaded[i][1][0].shape[1:] if shape != shi: raise error.BadFormatError( "TRAIN and %s sets have the different sample shape " "(%s vs %s)" % (CLASS_NAME[i], shape, shi)) self.create_originals(self.reshape(shape)) offsets = [0, 0] for ds in range(3): if loaded[ds][0] == 0: continue for arr in loaded[ds][1]: self.original_data[offsets[0]:(offsets[0] + arr.shape[0])] = \ self.transform_data(arr) offsets[0] += arr.shape[0] for arr in loaded[ds][2]: self.original_labels[offsets[1]:(offsets[1] + arr.shape[0])] =\ arr offsets[1] += arr.shape[0]
def _load_label(self, key, has_labels): label = self.get_image_label(key) if label is not None: has_labels = True if has_labels and label is None: raise error.BadFormatError( "%s does not have a label, but others do" % key) return label, has_labels
def load_pickles(self, index, pickles, pbar): unpickled = [] for pick in pickles: try: with open(pick, "rb") as fin: self.debug("Loading %s...", pick) if six.PY3: loaded = pickle.load(fin, encoding='charmap') else: loaded = pickle.load(fin) unpickled.append(loaded) pbar.inc() except Exception as e: self.warning("Failed to load %s (part of %s set)" % (pick, CLASS_NAME[index])) raise from_none(e) data = [] labels = [] for obj, pick in zip(unpickled, pickles): if not isinstance(obj, dict): raise TypeError("%s has the wrong format (part of %s set)" % (pick, CLASS_NAME[index])) try: data.append(obj["data"]) labels.append( numpy.array(obj["labels"], dtype=Loader.LABEL_DTYPE)) except KeyError as e: self.error("%s has the wrong format (part of %s set)", pick, CLASS_NAME[index]) raise from_none(e) lengths = [0, sum(len(l) for l in labels)] for arr in data: lengths[0] += arr.shape[0] if arr.shape[1:] != data[0].shape[1:]: raise error.BadFormatError( "Array has a different shape: expected %s, got %s" "(%s set)" % (data[0].shape[1:], arr.shape[1:], CLASS_NAME[index])) if lengths[0] != lengths[1]: raise error.BadFormatError( "Data and labels has the different number of samples (data %d," " labels %d)" % lengths) length = lengths[0] self.class_lengths[index] = length return length, data, labels
def fill_array(self, filling, array, stddev): if filling == "uniform": self.rand.fill(array, -stddev, stddev) elif filling == "gaussian": self.rand.fill_normal_real(array, 0, stddev) elif filling == "constant": array[:] = stddev else: raise error.BadFormatError("Invalid filling type %s" % filling)
def initialize(self, device, **kwargs): self.kernel_name = "gd_max_pooling" super(GDMaxPooling, self).initialize(device=device, **kwargs) if self.err_output.size != self.input_offset.size: raise error.BadFormatError("Shape of err_output differs from " "that of input_offset") self.input_offset.initialize(self.device)
def load_data(self): file_name = root.kohonen.loader.dataset_file try: data = numpy.loadtxt(file_name) except: raise error.BadFormatError("Could not load data from %s" % file_name) if data.shape != (2, 1000): raise error.BadFormatError("Data in %s has the invalid shape" % file_name) self.original_data.mem = numpy.zeros((1000, 2), dtype=self.dtype) self.original_data.mem[:, 0] = data[0] self.original_data.mem[:, 1] = data[1] self.class_lengths[0] = 0 self.class_lengths[1] = 0 self.class_lengths[2] = 1000
def initialize(self, device, **kwargs): super(Depooling, self).initialize(device, **kwargs) if self.output_offset.size != self.input.size: raise error.BadFormatError("output_offset.size != input.size") if self.output_offset.dtype != numpy.int32: raise error.BadFormatError("output_offset.dtype != numpy.int32") if self.output: assert self.output.shape[1:] == self.output_shape_source.shape[1:] if (not self.output or self.output.shape[0] != self.output_shape_source.shape[0]): self.output.reset(numpy.zeros(self.output_shape_source.shape, dtype=self.input.dtype)) self.init_vectors(self.input, self.output_offset, self.output)
def open_hdf5(self, index): path = self._files[index] if not path: return None, None h5f = h5py.File(path) data = h5f["data"] has_labels = "label" in h5f if self.has_labels and not has_labels or \ not self.has_labels and has_labels and \ self.total_samples > 0: raise error.BadFormatError("Some sets have labels and some do not") self._has_labels = has_labels labels = h5f["label"] if self.has_labels else None if self.has_labels and len(data) != len(labels): raise error.BadFormatError( "%s: data and labels have different lengths" % path) self.class_lengths[index] = len(data) self.shape = data.shape[1:] return data, labels
def initialize(self, **kwargs): """Loads the data, initializes indices, shuffles the training set. """ if self.testing: self.shuffle_limit = 0 self.global_offset = 0 del self.failed_minibatches[:] try: super(Loader, self).initialize(**kwargs) except AttributeError: pass try: self.load_data() except AttributeError as e: self.exception("Failed to load the data") raise from_none(e) if self.class_lengths[TRAIN] > 0: self.reset_normalization() self.max_minibatch_size = kwargs.get("minibatch_size", self.max_minibatch_size) self.on_before_create_minibatch_data() self._calc_class_end_offsets() sn_log_str = "Samples number: test: %d, validation: %d, train: %d" if self.train_ratio == 1.0: self.info(sn_log_str, *self.class_lengths) else: self.info( sn_log_str + " (used: %d)", *(self.class_lengths + [ self.effective_class_end_offsets[TRAIN] - self.effective_class_end_offsets[VALID] ])) self.minibatch_labels.reset( numpy.zeros(self.max_minibatch_size, dtype=Loader.LABEL_DTYPE ) if self.has_labels else None) self.raw_minibatch_labels[:] = (None, ) * self.max_minibatch_size self.minibatch_indices.reset( numpy.zeros(self.max_minibatch_size, dtype=Loader.INDEX_DTYPE)) try: self.create_minibatch_data() except Exception as e: self.error("Failed to create minibatch data") raise from_none(e) if not self.minibatch_data: raise error.BadFormatError("minibatch_data MUST be initialized in " "create_minibatch_data()") self.analyze_dataset() if self.testing: self.shuffled_indices.mem = None if not self.restored_from_snapshot or self.testing: self.shuffle()
def load_original(self, offs, labels_count, labels_fnme, images_fnme): """Loads data from original MNIST files. """ # Reading labels: with open(labels_fnme, "rb") as fin: header, = struct.unpack(">i", fin.read(4)) if header != 2049: raise error.BadFormatError("Wrong header in train-labels") n_labels, = struct.unpack(">i", fin.read(4)) if n_labels != labels_count: raise error.BadFormatError("Wrong number of labels in " "train-labels") arr = numpy.zeros(n_labels, dtype=numpy.byte) n = fin.readinto(arr) if n != n_labels: raise error.BadFormatError("EOF reached while reading labels " "from train-labels") self.original_labels[offs:offs + labels_count] = arr[:] if (numpy.min(self.original_labels) != 0 or numpy.max(self.original_labels) != 9): raise error.BadFormatError( "Wrong labels range in train-labels.") # Reading images: with open(images_fnme, "rb") as fin: header, = struct.unpack(">i", fin.read(4)) if header != 2051: raise error.BadFormatError("Wrong header in train-images") n_images, = struct.unpack(">i", fin.read(4)) if n_images != n_labels: raise error.BadFormatError("Wrong number of images in " "train-images") n_rows, n_cols = struct.unpack(">2i", fin.read(8)) if n_rows != 28 or n_cols != 28: raise error.BadFormatError("Wrong images size in train-images," " should be 28*28") # 0 - white, 255 - black pixels = numpy.zeros(n_images * n_rows * n_cols, dtype=numpy.ubyte) n = fin.readinto(pixels) if n != n_images * n_rows * n_cols: raise error.BadFormatError("EOF reached while reading images " "from train-images") # Transforming images into float arrays and normalizing to [-1, 1]: images = pixels.astype(numpy.float32).reshape(n_images, n_rows, n_cols) self.original_data.mem[offs:offs + n_images] = images[:]
def open_file(file_name): info = SF_INFO() info.format = 0 handle = libsndfile().sf_open(c_char_p(file_name.encode()), libsndfile.SFM_READ, byref(info)) if not handle: raise error.BadFormatError( "Audio file %s does not exist or is in an unsupported format" % file_name) if info.channels > 2: raise error.BadFormatError("Audio file " + file_name + " has more than two channels. " "Only mono or stereo are allowed.") return { "handle": handle, "samples": info.frames, "sampling_rate": info.samplerate, "channels": info.channels, "info": info }
def load_data(self): super(ImageLoaderMSEMixin, self).load_data() if self.restored_from_snapshot: return if len(self.target_keys) == 0: self.target_keys.extend(self.get_keys(TARGET)) length = len(self.target_keys) if len(set(self.target_keys)) < length: raise error.BadFormatError("Some targets have duplicate keys") self.target_keys.sort() if not self.has_labels and length != self.total_samples: raise error.BadFormatError( "Number of class samples %d differs from the number of " "targets %d" % (self.total_samples, length)) if self.has_labels: labels = [None] * length assert self.load_target_keys(self.target_keys, None, labels) if len(set(labels)) < length: raise error.BadFormatError("Targets have duplicate labels") self.target_label_map = { l: k for l, k in zip(labels, self.target_keys)}
def load_data(self): try: super(ImageLoader, self).load_data() except AttributeError: pass if self._restored_from_pickle_: self.info("Scanning for changes...") progress = ProgressBar(maxval=self.total_samples, term_width=40) progress.start() for keys in self.class_keys: for key in keys: progress.inc() size, _ = self.get_effective_image_info(key) if size != self.uncropped_shape: raise error.BadFormatError( "%s changed the effective size (now %s, was %s)" % (key, size, self.uncropped_shape)) progress.finish() return for keys in self.class_keys: del keys[:] for index, class_name in enumerate(CLASS_NAME): keys = set(self.get_keys(index)) self.class_keys[index].extend(keys) self.class_lengths[index] = len(keys) * self.samples_inflation self.class_keys[index].sort() if self.uncropped_shape == tuple(): raise error.BadFormatError( "original_shape was not initialized in get_keys()") self.info( "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)", self.total_samples, self.shape, *self.class_lengths) # Perform a quick (unreliable) test to determine if we have labels keys = next(k for k in self.class_keys if len(k) > 0) self._has_labels = self.load_keys( (keys[RandomGenerator(None).randint(len(keys))], ), None, None, None, None) self._resize_validation_keys(self.load_labels())
def _fill_array(self, filling_type, mem, stddev): if filling_type == "uniform": self.rand.fill(mem, -stddev, stddev) elif filling_type == "gaussian": self.rand.fill_normal_real(mem, 0, stddev) elif filling_type == "constant": mem[:] = stddev elif filling_type == "gabor": self._fill_with_gabor_filters(self.n_kernels, (self.ky, self.kx), stddev) else: raise error.BadFormatError("Invalid filling type: %s" % filling_type)
def squash_bars(self, x_inp, y_inp): if len(x_inp) != len(y_inp): raise error.BadFormatError( "Shape of X %s not equal shape of Y %s !" % (len(x_inp), len(y_inp))) if len(x_inp) > self.bars: segm = int(numpy.ceil(len(x_inp) / self.bars)) segm_min = int(numpy.floor(len(x_inp) / self.bars)) residue = 0 if segm == segm_min else ( len(x_inp) - segm * int(len(x_inp) / segm)) if int(numpy.ceil(len(x_inp) / segm)) < self.bars: self.inside_bar = True residue = len(x_inp) - segm_min * int(self.bars / segm_min) sum_x = 0 y = 0 if self.inside_bar: for i in range(0, len(x_inp) - residue, segm_min): sum_x, y = [ sum(arr[i:i + segm_min]) for arr in [x_inp, y_inp] ] x = sum_x / segm_min self.x_out.append(x) self.y_out.append(y) if residue: for j in range(0, residue): sum_x = (self.x_out[-1] + x_inp[len(x_inp) - residue + j]) y = self.y_out[-1] + y_inp[len(x_inp) - residue + j] x = sum_x / residue self.x_out[-1] = x self.y_out[-1] = y else: for i in range(0, len(x_inp) - residue, segm): sum_x, y = [sum(arr[i:i + segm]) for arr in [x_inp, y_inp]] x = sum_x / segm self.x_out.append(x) self.y_out.append(y) if residue: sum_x = 0 y = 0 for j in range(0, residue): sum_x += x_inp[len(x_inp) - residue + j] y += y_inp[len(x_inp) - residue + j] x = sum_x / residue self.x_out.append(x) self.y_out.append(y) else: for i in range(0, len(x_inp)): self.x_out.append(x_inp[i]) self.y_out.append(y_inp[i]) return (self.x_out, self.y_out)
def load_data(self): super(FullBatchImageLoaderMSEMixin, self).load_data() length = len(self.target_keys) * self.samples_inflation targets = numpy.zeros( (length,) + self.targets_shape, dtype=self.source_dtype) target_labels = [None] * length has_labels = self.load_target_keys( self.target_keys, targets, target_labels) if not has_labels: if self.has_labels: raise error.BadFormatError( "Targets do not have labels, but the classes do") # Associate targets with classes by sequence order self.original_targets.mem = targets return if not self.has_labels: raise error.BadFormatError( "Targets have labels, but the classes do not") if len(set(target_labels)) < length / self.samples_inflation: raise error.BadFormatError("Some targets have duplicate labels") diff = set(self.original_labels).difference(target_labels) if len(diff) > 0: raise error.BadFormatError( "Labels %s do not have corresponding targets" % diff) self.original_targets.reset() shape = (len(targets),) + targets[0].shape self.original_targets.mem = numpy.zeros( (len(self.original_labels),) + targets[0].shape, self.source_dtype) target_mapping = { target_labels[i * self.samples_inflation]: targets[i] for i in range(length // self.samples_inflation)} self.class_targets.reset(numpy.empty(shape, self.source_dtype)) for i, label in enumerate(target_labels): self.class_targets[i] = target_mapping[label] for i, label in enumerate(self.original_labels): self.original_targets[i] = target_mapping[label]
def initialize(self, device, **kwargs): self.reduce_size = min(self.reduce_size, int(numpy.prod(self.output_sample_shape))) self.sources_["all2all/softmax"] = { "REDUCE_SIZE": self.reduce_size } super(All2AllSoftmax, self).initialize(device=device, **kwargs) if self.output.mem.size // self.output.mem.shape[0] <= 1: raise error.BadFormatError( "Output sample size should be greater than 1 for SoftMax.") if not self.max_idx: self.max_idx.reset(numpy.zeros(self.output.shape[0], dtype=numpy.int32)) self.max_idx.initialize(self.device)
def initialize(self, device, **kwargs): super(GradientDescentConv, self).initialize(device=device, **kwargs) self._batch_size = self.input.shape[0] self._sy = self.input.shape[1] self._sx = self.input.shape[2] self._n_channels = (self.input.size // (self._batch_size * self._sx * self._sy)) self._kernel_size = self.kx * self.ky * self._n_channels self._dtype = self.err_output.dtype self._kx_app = ( 1 + ((self._sx - self.kx + self.padding[0] + self.padding[2]) // self.sliding[0])) self._ky_app = ( 1 + ((self._sy - self.ky + self.padding[1] + self.padding[3]) // self.sliding[1])) self._kernel_app_per_image = self._kx_app * self._ky_app self._kernel_app_total = self._batch_size * self._kernel_app_per_image self.cl_const = numpy.zeros(9, dtype=self._dtype) self._side = self.weights_shape[0] self._other = self.weights.size // self._side assert self._side == self.n_kernels assert self._other == self.kx * self.ky * self._n_channels n_weights = self.n_kernels * self.kx * self.ky * self._n_channels if self.weights.size != n_weights: raise error.BadFormatError("Expected number of weights to match " "input, n_kernels, kx, ky parameters") if self.include_bias and self.bias.size != self.n_kernels: raise error.BadFormatError("Expected bias to match n_kernels") if (self.input.size != self._batch_size * self._sy * self._sx * self._n_channels): raise error.BadFormatError("Expected input size to match " "batch_size * sy * sx * n_channels")
def get_image_info(self, key): """ :param key: The full path to the analysed image. :return: tuple (image size, number of channels). """ try: with open(key, "rb") as fin: img = Image.open(fin) return tuple(reversed(img.size)), MODE_COLOR_MAP[img.mode] except Exception as e: self.warning("Failed to read %s with PIL: %s", key, e) # Unable to read the image with PIL. Fall back to slow OpenCV # method which reads the whole image. img = cv2.imread(key, cv2.IMREAD_UNCHANGED) if img is None: raise error.BadFormatError("Unable to read %s" % key) return img.shape[:2], "BGR"
def layers(self, value): if self.mcdnnic_topology is not None and value != [{}]: raise ValueError( "Please do not set mcdnnic_topology and layers at the same " "time.") if not isinstance(value, list): raise ValueError("layers should be a list of dicts") if (value == [{}] and self.mcdnnic_topology is None and not self.preprocessing): raise error.BadFormatError( "Looks like layers is empty and mcdnnic_topology is not " "defined. Please set layers like in VELES samples or" "mcdnnic_topology like in artical 'Multi-column Deep Neural" "Networks for Image Classification'" "(http://papers.nips.cc/paper/4824-imagenet-classification-wi" "th-deep-convolutional-neural-networks)") for layer in value: if not isinstance(layer, dict): raise ValueError( "layers should be a list of dicts") self._layers = value
def background_image(self, value): if isinstance(value, str): with open(value, "rb") as fin: self.background_image = fin elif hasattr(value, "read") and hasattr(value, "seek"): self.background_image = numpy.array(Image.open(value)) elif isinstance(value, numpy.ndarray): if value.shape != self.shape: raise error.BadFormatError( "background_image's shape %s != sample's shape " "%s" % (value.shape, self.shape)) self._background_image = value if getattr(self, "background_color", None) is not None: self.warning( "background_color = %s is ignored in favor of " "background_image", self.background_color) elif value is None: self._background_image = None else: raise ValueError("background_image must be any of the following: " "file name, file object, numpy array or None")
def initialize(self, device, **kwargs): super(EvaluatorMSE, self).initialize(device=device, **kwargs) if self.testing: return if self.target.size != self.output.size: raise error.BadFormatError( "target.size != output.size (%s != %s)" % (self.target.size, self.output.size)) self.sources_["evaluator_mse"] = {} self.sources_["denormalization"] = {} dtype = self.output.dtype self.metrics.reset(numpy.zeros(3, dtype=dtype)) self.metrics[2] = 1.0e30 # mse_min self.mse.reset(numpy.zeros(self.err_output.mem.shape[0], dtype)) self.n_err.reset(numpy.zeros(2, dtype=numpy.int32)) self.init_vectors(self.n_err, self.target, self.metrics, self.mse) if self.class_targets: self.class_targets.initialize(self.device)
def load_data(self): super(FullBatchImageLoader, self).load_data() # Allocate data required_mem = self.total_samples * numpy.prod(self.shape) * \ numpy.dtype(self.source_dtype).itemsize if virtual_memory().available < required_mem: gb = 1.0 / (1000 * 1000 * 1000) self.critical("Not enough memory (free %.3f Gb, required %.3f Gb)", virtual_memory().free * gb, required_mem * gb) raise MemoryError("Not enough memory") # Real allocation will still happen during the second pass self.create_originals(self.shape) self.original_label_values.mem = numpy.zeros( self.total_samples, numpy.float32) has_labels = self._fill_original_data() # Delete labels mem if no labels was extracted if numpy.prod(has_labels) == 0 and sum(has_labels) > 0: raise error.BadFormatError( "Some classes do not have labels while other do") if sum(has_labels) == 0: del self.original_labels[:]
def get_image_data(self, key): """ Loads data from image and normalizes it. Returns: :class:`numpy.ndarrayarray`: if there was one image in the file. tuple: `(data, labels)` if there were many images in the file """ try: with open(key, "rb") as fin: img = Image.open(fin) if img.mode in ("P", "CMYK"): return numpy.array(img.convert("RGB"), dtype=self.source_dtype) else: return numpy.array(img, dtype=self.source_dtype) except (TypeError, KeyboardInterrupt) as e: raise from_none(e) except Exception as e: self.warning("Failed to read %s with PIL: %s", key, e) img = cv2.imread(key) if img is None: raise error.BadFormatError("Unable to read %s" % key) return img.astype(self.source_dtype)
def initialize(self, device, **kwargs): if (id(self.output) == id(self.input) or (self.output is not None and self.output.mem is not None and eq_addr(self.output.mem, self.input.mem))): raise error.BadFormatError("in_place for this unit is prohibited") super(ForwardSinCos, self).initialize(device=device, **kwargs)
def initialize(self, device, **kwargs): if not self.input: raise error.BadFormatError( "input should be set and should not be equal to output") super(BackwardSinCos, self).initialize(device=device, **kwargs)