def load_data(self): pbar = ProgressBar(maxval=sum(len(p) for p in self._pickles), term_width=40) self.info("Loading %d pickles...", pbar.maxval) pbar.start() loaded = [self.load_pickles(i, self._pickles[i], pbar) for i in range(3)] pbar.finish() self.info("Initializing the arrays...") shape = loaded[2][1][0].shape[1:] for i in range(2): if loaded[i][0] > 0: shi = loaded[i][1][0].shape[1:] if shape != shi: raise error.BadFormatError( "TRAIN and %s sets have the different sample shape " "(%s vs %s)" % (CLASS_NAME[i], shape, shi)) self.create_originals(self.reshape(shape)) offsets = [0, 0] for ds in range(3): if loaded[ds][0] == 0: continue for arr in loaded[ds][1]: self.original_data[offsets[0]:(offsets[0] + arr.shape[0])] = \ self.transform_data(arr) offsets[0] += arr.shape[0] for arr in loaded[ds][2]: self.original_labels[offsets[1]:(offsets[1] + arr.shape[0])] =\ arr offsets[1] += arr.shape[0]
def load_data(self): pbar = ProgressBar(maxval=sum(len(p) for p in self._pickles), term_width=40) self.info("Loading %d pickles...", pbar.maxval) pbar.start() loaded = [ self.load_pickles(i, self._pickles[i], pbar) for i in range(3) ] pbar.finish() self.info("Initializing the arrays...") shape = loaded[2][1][0].shape[1:] for i in range(2): if loaded[i][0] > 0: shi = loaded[i][1][0].shape[1:] if shape != shi: raise error.BadFormatError( "TRAIN and %s sets have the different sample shape " "(%s vs %s)" % (CLASS_NAME[i], shape, shi)) self.create_originals(self.reshape(shape)) offsets = [0, 0] for ds in range(3): if loaded[ds][0] == 0: continue for arr in loaded[ds][1]: self.original_data[offsets[0]:(offsets[0] + arr.shape[0])] = \ self.transform_data(arr) offsets[0] += arr.shape[0] for arr in loaded[ds][2]: self.original_labels[offsets[1]:(offsets[1] + arr.shape[0])] =\ arr offsets[1] += arr.shape[0]
def _fill_original_data(self): pbar = ProgressBar( term_width=50, maxval=self.total_samples * self.samples_inflation, widgets=["Loading %dx%d images " % (self.total_samples, self.crop_number), Bar(), " ", Percentage()], log_level=logging.INFO, poll=0.5, ) pbar.start() offset = 0 has_labels = [] data = self.original_data.mem label_values = self.original_label_values.mem for keys in self.class_keys: if len(keys) == 0: continue if self.samples_inflation == 1: labels = [None] * len(keys) has_labels.append(self.load_keys(keys, pbar, data[offset:], labels, label_values[offset:])) offset += len(keys) else: labels = [None] * len(keys) * self.samples_inflation offset, hl = self._load_distorted_keys(keys, data, labels, label_values, offset, pbar) has_labels.append(hl) self.original_labels[offset - len(labels) : offset] = labels pbar.finish() return has_labels
def _fill_original_data(self): pbar = ProgressBar( term_width=50, maxval=self.total_samples * self.samples_inflation, widgets=["Loading %dx%d images " % (self.total_samples, self.crop_number), Bar(), ' ', Percentage()], log_level=logging.INFO, poll=0.5) pbar.start() offset = 0 has_labels = [] data = self.original_data.mem label_values = self.original_label_values.mem for keys in self.class_keys: if len(keys) == 0: continue if self.samples_inflation == 1: labels = [None] * len(keys) has_labels.append(self.load_keys( keys, pbar, data[offset:], labels, label_values[offset:])) offset += len(keys) else: labels = [None] * len(keys) * self.samples_inflation offset, hl = self._load_distorted_keys( keys, data, labels, label_values, offset, pbar) has_labels.append(hl) self.original_labels[offset - len(labels):offset] = labels pbar.finish() return has_labels
def initialize(self, **kwargs): """Initializes all the units belonging to this Workflow, in dependency order. """ try: snapshot = kwargs["snapshot"] except KeyError: raise from_none( KeyError( "\"snapshot\" (True/False) must be provided in kwargs")) units_number = len(self) fin_text = "%d units were initialized" % units_number maxlen = max([len(u.name) for u in self] + [len(fin_text)]) if not self.is_standalone: self.verify_interface(IDistributable) progress = ProgressBar( maxval=units_number, term_width=min(80, len(self) + 8 + maxlen), widgets=[Percentage(), ' ', Bar(), ' ', ' ' * maxlen], poll=0) progress.widgets[0].TIME_SENSITIVE = True self.info("Initializing units in %s...", self.name) progress.start() units_in_dependency_order = list(self.units_in_dependency_order) iqueue = list(units_in_dependency_order) while len(iqueue) > 0: unit = iqueue.pop(0) # Early abort in case of KeyboardInterrupt if self.thread_pool.joined: break progress.widgets[-1] = unit.name + ' ' * (maxlen - len(unit.name)) progress.update() if not self.is_standalone: unit.verify_interface(IDistributable) try: partially = unit.initialize(**kwargs) except: self.error("Unit \"%s\" failed to initialize", unit.name) raise if partially: iqueue.append(unit) else: if snapshot and not unit._remembers_gates: unit.close_gate() unit.close_upstream() progress.inc() progress.widgets[-1] = fin_text + ' ' * (maxlen - len(fin_text)) progress.finish() initialized_units_number = len(units_in_dependency_order) if initialized_units_number < units_number: self.warning("Not all units were initialized (%d left): %s", units_number - initialized_units_number, set(self) - set(units_in_dependency_order))
def initialize(self, **kwargs): """Initializes all the units belonging to this Workflow, in dependency order. """ try: snapshot = kwargs["snapshot"] except KeyError: raise from_none(KeyError( "\"snapshot\" (True/False) must be provided in kwargs")) units_number = len(self) fin_text = "%d units were initialized" % units_number maxlen = max([len(u.name) for u in self] + [len(fin_text)]) if not self.is_standalone: self.verify_interface(IDistributable) progress = ProgressBar(maxval=units_number, term_width=min(80, len(self) + 8 + maxlen), widgets=[Percentage(), ' ', Bar(), ' ', ' ' * maxlen], poll=0) progress.widgets[0].TIME_SENSITIVE = True self.info("Initializing units in %s...", self.name) progress.start() units_in_dependency_order = list(self.units_in_dependency_order) iqueue = list(units_in_dependency_order) while len(iqueue) > 0: unit = iqueue.pop(0) # Early abort in case of KeyboardInterrupt if self.thread_pool.joined: break progress.widgets[-1] = unit.name + ' ' * (maxlen - len(unit.name)) progress.update() if not self.is_standalone: unit.verify_interface(IDistributable) try: partially = unit.initialize(**kwargs) except: self.error("Unit \"%s\" failed to initialize", unit.name) raise if partially: iqueue.append(unit) else: if snapshot and not unit._remembers_gates: unit.close_gate() unit.close_upstream() progress.inc() progress.widgets[-1] = fin_text + ' ' * (maxlen - len(fin_text)) progress.finish() initialized_units_number = len(units_in_dependency_order) if initialized_units_number < units_number: self.warning("Not all units were initialized (%d left): %s", units_number - initialized_units_number, set(self) - set(units_in_dependency_order))
def load_labels(self): if not self.has_labels: return self.info("Reading labels...") different_labels = defaultdict(int), defaultdict(int), defaultdict(int) label_key_map = defaultdict(list), defaultdict(list), defaultdict(list) pb = ProgressBar(maxval=self.total_samples, term_width=40) pb.start() for class_index in range(3): for key in self.class_keys[class_index]: label, has_labels = self._load_label(key, True) assert has_labels different_labels[class_index][label] += 1 label_key_map[class_index][label].append(key) self._samples_mapping[label].add(key) pb.inc() pb.finish() return different_labels, label_key_map
def load_data(self): try: super(ImageLoader, self).load_data() except AttributeError: pass if self._restored_from_pickle_: self.info("Scanning for changes...") progress = ProgressBar(maxval=self.total_samples, term_width=40) progress.start() for keys in self.class_keys: for key in keys: progress.inc() size, _ = self.get_effective_image_info(key) if size != self.uncropped_shape: raise error.BadFormatError( "%s changed the effective size (now %s, was %s)" % (key, size, self.uncropped_shape)) progress.finish() return for keys in self.class_keys: del keys[:] for index, class_name in enumerate(CLASS_NAME): keys = set(self.get_keys(index)) self.class_keys[index].extend(keys) self.class_lengths[index] = len(keys) * self.samples_inflation self.class_keys[index].sort() if self.uncropped_shape == tuple(): raise error.BadFormatError( "original_shape was not initialized in get_keys()") self.info( "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)", self.total_samples, self.shape, *self.class_lengths) # Perform a quick (unreliable) test to determine if we have labels keys = next(k for k in self.class_keys if len(k) > 0) self._has_labels = self.load_keys( (keys[RandomGenerator(None).randint(len(keys))], ), None, None, None, None) self._resize_validation_keys(self.load_labels())
def load_data(self): try: super(ImageLoader, self).load_data() except AttributeError: pass if self._restored_from_pickle_: self.info("Scanning for changes...") progress = ProgressBar(maxval=self.total_samples, term_width=40) progress.start() for keys in self.class_keys: for key in keys: progress.inc() size, _ = self.get_effective_image_info(key) if size != self.uncropped_shape: raise error.BadFormatError( "%s changed the effective size (now %s, was %s)" % (key, size, self.uncropped_shape)) progress.finish() return for keys in self.class_keys: del keys[:] for index, class_name in enumerate(CLASS_NAME): keys = set(self.get_keys(index)) self.class_keys[index].extend(keys) self.class_lengths[index] = len(keys) * self.samples_inflation self.class_keys[index].sort() if self.uncropped_shape == tuple(): raise error.BadFormatError( "original_shape was not initialized in get_keys()") self.info( "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)", self.total_samples, self.shape, *self.class_lengths) # Perform a quick (unreliable) test to determine if we have labels keys = next(k for k in self.class_keys if len(k) > 0) self._has_labels = self.load_keys( (keys[RandomGenerator(None).randint(len(keys))],), None, None, None, None) self._resize_validation_keys(self.load_labels())
def render(self, info): info["imgformat"] = self.image_format content = super(MarkdownBackend, self).render(info) del info["imgformat"] if self.file is None: return content if isinstance(self.file, string_types): file = codecs.open(self.file, mode="w", encoding="utf-8", errors="xmlcharrefreplace") else: file = self.file if not self.html: with file: file.write(content) return content with file: self.info("Generating HTML...") html = self._html_template.render( github_css=self.use_github_css, imgformat=self.image_format, markdown=markdown.markdown(content, extensions=( "markdown.extensions.smarty", "markdown.extensions.tables", "markdown.extensions.codehilite", "markdown.extensions.admonition", "gfm"), extension_configs={"markdown.extensions.codehilite": { "guess_lang": False}}, output_format="html5"), **info) file.write(html) if self.use_github_css: self.debug("Linked with GitHub CSS file") if not isinstance(self.file, string_types): return html basedir = os.path.dirname(self.file) fn = os.path.join(basedir, "github-markdown.css") if not os.path.exists(fn): self.info("Downloading github-markdown-css...") wget.download( "https://github.com/sindresorhus/github-markdown-css/raw/" "gh-pages/github-markdown.css", out=fn) print() self.info("Saving images...") progress = ProgressBar(2 + len(info["plots"])) progress.term_width = progress.maxval + 7 progress.start() fn = os.path.join(basedir, "workflow.%s" % self.image_format) with open(fn, "wb") as fout: fout.write(info["workflow_graph"][self.image_format]) progress.inc() self.debug("Saved %s", fn) fn = os.path.join(basedir, info["image"]["name"]) with open(fn, "wb") as fout: fout.write(info["image"]["data"]) progress.inc() self.debug("Saved %s", fn) for key, data in info["plots"].items(): fn = os.path.join(basedir, "%s.%s" % (key, self.image_format)) with open(fn, "wb") as fout: fout.write(data[self.image_format]) progress.inc() self.debug("Saved %s", fn) progress.finish() self.info("%s is ready", self.file) return html