Exemple #1
0
 def _fill_original_data(self):
     pbar = ProgressBar(
         term_width=50,
         maxval=self.total_samples * self.samples_inflation,
         widgets=["Loading %dx%d images " % (self.total_samples, self.crop_number), Bar(), " ", Percentage()],
         log_level=logging.INFO,
         poll=0.5,
     )
     pbar.start()
     offset = 0
     has_labels = []
     data = self.original_data.mem
     label_values = self.original_label_values.mem
     for keys in self.class_keys:
         if len(keys) == 0:
             continue
         if self.samples_inflation == 1:
             labels = [None] * len(keys)
             has_labels.append(self.load_keys(keys, pbar, data[offset:], labels, label_values[offset:]))
             offset += len(keys)
         else:
             labels = [None] * len(keys) * self.samples_inflation
             offset, hl = self._load_distorted_keys(keys, data, labels, label_values, offset, pbar)
             has_labels.append(hl)
         self.original_labels[offset - len(labels) : offset] = labels
     pbar.finish()
     return has_labels
Exemple #2
0
 def load_data(self):
     pbar = ProgressBar(maxval=sum(len(p) for p in self._pickles),
                        term_width=40)
     self.info("Loading %d pickles...", pbar.maxval)
     pbar.start()
     loaded = [self.load_pickles(i, self._pickles[i], pbar)
               for i in range(3)]
     pbar.finish()
     self.info("Initializing the arrays...")
     shape = loaded[2][1][0].shape[1:]
     for i in range(2):
         if loaded[i][0] > 0:
             shi = loaded[i][1][0].shape[1:]
             if shape != shi:
                 raise error.BadFormatError(
                     "TRAIN and %s sets have the different sample shape "
                     "(%s vs %s)" % (CLASS_NAME[i], shape, shi))
     self.create_originals(self.reshape(shape))
     offsets = [0, 0]
     for ds in range(3):
         if loaded[ds][0] == 0:
             continue
         for arr in loaded[ds][1]:
             self.original_data[offsets[0]:(offsets[0] + arr.shape[0])] = \
                 self.transform_data(arr)
             offsets[0] += arr.shape[0]
         for arr in loaded[ds][2]:
             self.original_labels[offsets[1]:(offsets[1] + arr.shape[0])] =\
                 arr
             offsets[1] += arr.shape[0]
Exemple #3
0
 def load_data(self):
     pbar = ProgressBar(maxval=sum(len(p) for p in self._pickles),
                        term_width=40)
     self.info("Loading %d pickles...", pbar.maxval)
     pbar.start()
     loaded = [
         self.load_pickles(i, self._pickles[i], pbar) for i in range(3)
     ]
     pbar.finish()
     self.info("Initializing the arrays...")
     shape = loaded[2][1][0].shape[1:]
     for i in range(2):
         if loaded[i][0] > 0:
             shi = loaded[i][1][0].shape[1:]
             if shape != shi:
                 raise error.BadFormatError(
                     "TRAIN and %s sets have the different sample shape "
                     "(%s vs %s)" % (CLASS_NAME[i], shape, shi))
     self.create_originals(self.reshape(shape))
     offsets = [0, 0]
     for ds in range(3):
         if loaded[ds][0] == 0:
             continue
         for arr in loaded[ds][1]:
             self.original_data[offsets[0]:(offsets[0] + arr.shape[0])] = \
                 self.transform_data(arr)
             offsets[0] += arr.shape[0]
         for arr in loaded[ds][2]:
             self.original_labels[offsets[1]:(offsets[1] + arr.shape[0])] =\
                 arr
             offsets[1] += arr.shape[0]
Exemple #4
0
 def _fill_original_data(self):
     pbar = ProgressBar(
         term_width=50, maxval=self.total_samples * self.samples_inflation,
         widgets=["Loading %dx%d images " % (self.total_samples,
                                             self.crop_number),
                  Bar(), ' ', Percentage()],
         log_level=logging.INFO, poll=0.5)
     pbar.start()
     offset = 0
     has_labels = []
     data = self.original_data.mem
     label_values = self.original_label_values.mem
     for keys in self.class_keys:
         if len(keys) == 0:
             continue
         if self.samples_inflation == 1:
             labels = [None] * len(keys)
             has_labels.append(self.load_keys(
                 keys, pbar, data[offset:], labels,
                 label_values[offset:]))
             offset += len(keys)
         else:
             labels = [None] * len(keys) * self.samples_inflation
             offset, hl = self._load_distorted_keys(
                 keys, data, labels, label_values, offset, pbar)
             has_labels.append(hl)
         self.original_labels[offset - len(labels):offset] = labels
     pbar.finish()
     return has_labels
Exemple #5
0
 def _iterate_class(self, class_index, fn):
     size = int(
         numpy.ceil(self.class_lengths[class_index] /
                    self.max_minibatch_size))
     for i in ProgressBar(term_width=40)(range(size)):
         start_index = i * self.max_minibatch_size
         self.minibatch_size = min(
             self.max_minibatch_size,
             self.class_lengths[class_index] - start_index)
         offset = self.class_end_offsets[class_index - 1] + start_index
         self.minibatch_indices[:self.minibatch_size] = \
             self.shuffled_indices[offset:offset + self.minibatch_size]
         self.fill_minibatch()
         fn()
Exemple #6
0
    def load_labels(self):
        if not self.has_labels:
            return
        self.info("Reading labels...")
        different_labels = defaultdict(int), defaultdict(int), defaultdict(int)
        label_key_map = defaultdict(list), defaultdict(list), defaultdict(list)
        pb = ProgressBar(maxval=self.total_samples, term_width=40)
        pb.start()
        for class_index in range(3):
            for key in self.class_keys[class_index]:
                label, has_labels = self._load_label(key, True)
                assert has_labels
                different_labels[class_index][label] += 1
                label_key_map[class_index][label].append(key)
                self._samples_mapping[label].add(key)
                pb.inc()
        pb.finish()

        return different_labels, label_key_map
Exemple #7
0
 def setup_logging(level):
     if Logger.SET_UP:
         raise Logger.LoggerHasBeenAlreadySetUp()
     Logger.SET_UP = True
     Logger.ensure_utf8_streams()
     # Set basic log level
     logging.basicConfig(level=level, stream=sys.stdout)
     # Override the global log level and the output stream in case they have
     # been already changed previously
     root_logger = logging.getLogger()
     root_logger.level = level
     root_logger.handlers[0].stream = sys.stdout
     ProgressBar().logger.level = level
     # Turn on colors in case of an interactive out tty or IPython
     if has_colors():
         root = logging.getLogger()
         handler = root.handlers[0]
         handler.setFormatter(Logger.ColorFormatter())
Exemple #8
0
    def load_data(self):
        try:
            super(ImageLoader, self).load_data()
        except AttributeError:
            pass
        if self._restored_from_pickle_:
            self.info("Scanning for changes...")
            progress = ProgressBar(maxval=self.total_samples, term_width=40)
            progress.start()
            for keys in self.class_keys:
                for key in keys:
                    progress.inc()
                    size, _ = self.get_effective_image_info(key)
                    if size != self.uncropped_shape:
                        raise error.BadFormatError(
                            "%s changed the effective size (now %s, was %s)" %
                            (key, size, self.uncropped_shape))
            progress.finish()
            return
        for keys in self.class_keys:
            del keys[:]
        for index, class_name in enumerate(CLASS_NAME):
            keys = set(self.get_keys(index))
            self.class_keys[index].extend(keys)
            self.class_lengths[index] = len(keys) * self.samples_inflation
            self.class_keys[index].sort()

        if self.uncropped_shape == tuple():
            raise error.BadFormatError(
                "original_shape was not initialized in get_keys()")
        self.info(
            "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)",
            self.total_samples, self.shape, *self.class_lengths)

        # Perform a quick (unreliable) test to determine if we have labels
        keys = next(k for k in self.class_keys if len(k) > 0)
        self._has_labels = self.load_keys(
            (keys[RandomGenerator(None).randint(len(keys))], ), None, None,
            None, None)
        self._resize_validation_keys(self.load_labels())
Exemple #9
0
    def load_labels(self):
        if not self.has_labels:
            return
        self.info("Reading labels...")
        different_labels = defaultdict(int), defaultdict(int), defaultdict(int)
        label_key_map = defaultdict(list), defaultdict(list), defaultdict(list)
        pb = ProgressBar(maxval=self.total_samples, term_width=40)
        pb.start()
        for class_index in range(3):
            for key in self.class_keys[class_index]:
                label, has_labels = self._load_label(key, True)
                assert has_labels
                different_labels[class_index][label] += 1
                label_key_map[class_index][label].append(key)
                self._samples_mapping[label].add(key)
                pb.inc()
        pb.finish()

        return different_labels, label_key_map
Exemple #10
0
    def load_data(self):
        try:
            super(ImageLoader, self).load_data()
        except AttributeError:
            pass
        if self._restored_from_pickle_:
            self.info("Scanning for changes...")
            progress = ProgressBar(maxval=self.total_samples, term_width=40)
            progress.start()
            for keys in self.class_keys:
                for key in keys:
                    progress.inc()
                    size, _ = self.get_effective_image_info(key)
                    if size != self.uncropped_shape:
                        raise error.BadFormatError(
                            "%s changed the effective size (now %s, was %s)" %
                            (key, size, self.uncropped_shape))
            progress.finish()
            return
        for keys in self.class_keys:
            del keys[:]
        for index, class_name in enumerate(CLASS_NAME):
            keys = set(self.get_keys(index))
            self.class_keys[index].extend(keys)
            self.class_lengths[index] = len(keys) * self.samples_inflation
            self.class_keys[index].sort()

        if self.uncropped_shape == tuple():
            raise error.BadFormatError(
                "original_shape was not initialized in get_keys()")
        self.info(
            "Found %d samples of shape %s (%d TEST, %d VALIDATION, %d TRAIN)",
            self.total_samples, self.shape, *self.class_lengths)

        # Perform a quick (unreliable) test to determine if we have labels
        keys = next(k for k in self.class_keys if len(k) > 0)
        self._has_labels = self.load_keys(
            (keys[RandomGenerator(None).randint(len(keys))],),
            None, None, None, None)
        self._resize_validation_keys(self.load_labels())
Exemple #11
0
 def initialize(self, **kwargs):
     """Initializes all the units belonging to this Workflow, in dependency
     order.
     """
     units_number = len(self)
     fin_text = "%d units were initialized" % units_number
     maxlen = max([len(u.name) for u in self] + [len(fin_text)])
     if not self.is_standalone:
         self.verify_interface(IDistributable)
     progress = ProgressBar(maxval=units_number,
                            term_width=min(80, len(self) + 8 + maxlen),
                            widgets=[Percentage(), ' ', Bar(), ' ',
                                     ' ' * maxlen], poll=0)
     progress.widgets[0].TIME_SENSITIVE = True
     self.info("Initializing units in %s...", self.name)
     progress.start()
     units_in_dependency_order = list(self.units_in_dependency_order)
     iqueue = list(units_in_dependency_order)
     while len(iqueue) > 0:
         unit = iqueue.pop(0)
         # Early abort in case of KeyboardInterrupt
         if self.thread_pool.joined:
             break
         progress.widgets[-1] = unit.name + ' ' * (maxlen - len(unit.name))
         progress.update()
         if not self.is_standalone:
             unit.verify_interface(IDistributable)
         try:
             partially = unit.initialize(**kwargs)
         except:
             self.error("Unit \"%s\" failed to initialize", unit.name)
             raise
         if partially:
             iqueue.append(unit)
         else:
             if self.restored_from_snapshot and not unit._remembers_gates:
                 unit.close_gate()
                 unit.close_upstream()
             progress.inc()
     progress.widgets[-1] = fin_text + ' ' * (maxlen - len(fin_text))
     progress.finish()
     initialized_units_number = len(units_in_dependency_order)
     if initialized_units_number < units_number:
         self.warning("Not all units were initialized (%d left): %s",
                      units_number - initialized_units_number,
                      set(self) - set(units_in_dependency_order))
     self._restored_from_snapshot_ = None
Exemple #12
0
 def initialize(self, **kwargs):
     """Initializes all the units belonging to this Workflow, in dependency
     order.
     """
     units_number = len(self)
     fin_text = "%d units were initialized" % units_number
     maxlen = max([len(u.name) for u in self] + [len(fin_text)])
     if not self.is_standalone:
         self.verify_interface(IDistributable)
     progress = ProgressBar(
         maxval=units_number,
         term_width=min(80,
                        len(self) + 8 + maxlen),
         widgets=[Percentage(), ' ',
                  Bar(), ' ', ' ' * maxlen],
         poll=0)
     progress.widgets[0].TIME_SENSITIVE = True
     self.info("Initializing units in %s...", self.name)
     progress.start()
     units_in_dependency_order = list(self.units_in_dependency_order)
     iqueue = list(units_in_dependency_order)
     while len(iqueue) > 0:
         unit = iqueue.pop(0)
         # Early abort in case of KeyboardInterrupt
         if self.thread_pool.joined:
             break
         progress.widgets[-1] = unit.name + ' ' * (maxlen - len(unit.name))
         progress.update()
         if not self.is_standalone:
             unit.verify_interface(IDistributable)
         try:
             partially = unit.initialize(**kwargs)
         except:
             self.error("Unit \"%s\" failed to initialize", unit.name)
             raise
         if partially:
             iqueue.append(unit)
         else:
             if self.restored_from_snapshot and not unit._remembers_gates:
                 unit.close_gate()
                 unit.close_upstream()
             progress.inc()
     progress.widgets[-1] = fin_text + ' ' * (maxlen - len(fin_text))
     progress.finish()
     initialized_units_number = len(units_in_dependency_order)
     if initialized_units_number < units_number:
         self.warning("Not all units were initialized (%d left): %s",
                      units_number - initialized_units_number,
                      set(self) - set(units_in_dependency_order))
     self._restored_from_snapshot_ = None
Exemple #13
0
 def render(self, info):
     info["imgformat"] = self.image_format
     content = super(MarkdownBackend, self).render(info)
     del info["imgformat"]
     if self.file is None:
         return content
     if isinstance(self.file, string_types):
         file = codecs.open(self.file, mode="w", encoding="utf-8",
                            errors="xmlcharrefreplace")
     else:
         file = self.file
     if not self.html:
         with file:
             file.write(content)
         return content
     with file:
         self.info("Generating HTML...")
         html = self._html_template.render(
             github_css=self.use_github_css, imgformat=self.image_format,
             markdown=markdown.markdown(content, extensions=(
                 "markdown.extensions.smarty", "markdown.extensions.tables",
                 "markdown.extensions.codehilite",
                 "markdown.extensions.admonition", "gfm"),
                 extension_configs={"markdown.extensions.codehilite": {
                     "guess_lang": False}},
                 output_format="html5"),
             **info)
         file.write(html)
     if self.use_github_css:
         self.debug("Linked with GitHub CSS file")
     if not isinstance(self.file, string_types):
         return html
     basedir = os.path.dirname(self.file)
     fn = os.path.join(basedir, "github-markdown.css")
     if not os.path.exists(fn):
         self.info("Downloading github-markdown-css...")
         wget.download(
             "https://github.com/sindresorhus/github-markdown-css/raw/"
             "gh-pages/github-markdown.css", out=fn)
         print()
     self.info("Saving images...")
     progress = ProgressBar(2 + len(info["plots"]))
     progress.term_width = progress.maxval + 7
     progress.start()
     fn = os.path.join(basedir, "workflow.%s" % self.image_format)
     with open(fn, "wb") as fout:
         fout.write(info["workflow_graph"][self.image_format])
     progress.inc()
     self.debug("Saved %s", fn)
     fn = os.path.join(basedir, info["image"]["name"])
     with open(fn, "wb") as fout:
         fout.write(info["image"]["data"])
     progress.inc()
     self.debug("Saved %s", fn)
     for key, data in info["plots"].items():
         fn = os.path.join(basedir, "%s.%s" % (key, self.image_format))
         with open(fn, "wb") as fout:
             fout.write(data[self.image_format])
         progress.inc()
         self.debug("Saved %s", fn)
     progress.finish()
     self.info("%s is ready", self.file)
     return html