Python IngestManager Examples

Programming Language: Python

Namespace/Package Name: masir

Class/Type: IngestManager

Examples at hotexamples.com: 3

Python IngestManager - 3 examples found. These are the top rated real world Python examples of masir.IngestManager extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ingest_image(2)

Example #1

Show file

File: ingest_images.py Project: brandontheis/SMQTK

def main():
    import optparse
    usage = "Usage: %prog [options] GLOB1 [ GLOB2 ... ]"
    parser = optparse.OptionParser(usage)
    parser.add_option('-d', '--data-dir',
                      help="Non-standard directory to treat as the base data "
                           "directory.")
    opts, args = parser.parse_args()

    log = logging.getLogger("main")
    data_dir = opts.data_dir or masir_config.DIR_DATA

    source_files = []
    for g in args:
        source_files.extend(glob.glob(g))

    if not source_files:
        raise ValueError("No files found with the supplied globs.")

    im = IngestManager(data_dir)
    for f in source_files:
        # Expect to possibly find some bson files in here. Skip them.
        if osp.splitext(f)[1] == '.bson':
            continue

        try:
            # if there's a found paired BSON file, pass that too
            md_filepath = osp.splitext(f)[0] + ".bson"
            if not osp.exists(md_filepath):
                md_filepath = None

            im.ingest_image(f, md_filepath)
        except IOError:
            log.warn("Not an image file: %s", f)
            continue
        except bson.InvalidBSON, ex:
            log.warn("BSON Error: %s", str(ex))
        except Exception, ex:
            log.warn("Other exception caught for file '%s':\n"
                     "    %s",
                     f, str(ex))

Example #2

Show file

File: iqr_session.py Project: brandontheis/SMQTK

    def __init__(self, base_work_dir, descriptor):
        """ Initialize IQR session

        :param base_work_dir: Base directory to put working files into
        :type base_work_dir: str
        :param descriptor: FeatureDescriptor to use for this IQR session
        :type descriptor: masir.search.FeatureDescriptor.FeatureDescriptor

        """
        self._session_lock = multiprocessing.RLock()

        self.uuid = uuid.uuid1()
        self.positive_ids = set()
        self.negative_ids = set()

        self.work_dir = osp.join(osp.abspath(osp.expanduser(base_work_dir)),
                                 'iqr', 'session-%s' % str(self.uuid))
        if not osp.isdir(self.work_dir):
            os.makedirs(self.work_dir)

        #: :type: dict of (str, FeatureDescriptor)
        self.descriptor = descriptor

        # noinspection PyTypeChecker
        self.feature_memory = FeatureMemory.construct_from_files(
            descriptor.ids_file, descriptor.bg_flags_file,
            descriptor.feature_data_file, descriptor.kernel_data_file
        )
        # noinspection PyProtectedMember
        self._original_fm_bgid_set = self.feature_memory._bg_clip_ids

        # Mapping of a clip ID to the probability of it being associated to
        # positive adjudications. This is None before any refinement occurs.
        #: :type: None or dict of (int, float)
        self.results = None

        self.svm_train_params = '-q -t 4 -b 1 -w1 50 -c 100'

        # Ingest where extension images are placed
        extension_ingest_dir = osp.join(self.work_dir, "extension_ingest")
        self.extension_ingest = IngestManager(
            extension_ingest_dir,
            self.feature_memory.get_ids().max() + 1
        )

Example #3

Show file

File: iqr_session.py Project: brandontheis/SMQTK

class IqrSession (object):

    @property
    def _log(self):
        return logging.getLogger(
            '.'.join((self.__module__, self.__class__.__name__))
            + "[%s]" % self.uuid
        )

    def __init__(self, base_work_dir, descriptor):
        """ Initialize IQR session

        :param base_work_dir: Base directory to put working files into
        :type base_work_dir: str
        :param descriptor: FeatureDescriptor to use for this IQR session
        :type descriptor: masir.search.FeatureDescriptor.FeatureDescriptor

        """
        self._session_lock = multiprocessing.RLock()

        self.uuid = uuid.uuid1()
        self.positive_ids = set()
        self.negative_ids = set()

        self.work_dir = osp.join(osp.abspath(osp.expanduser(base_work_dir)),
                                 'iqr', 'session-%s' % str(self.uuid))
        if not osp.isdir(self.work_dir):
            os.makedirs(self.work_dir)

        #: :type: dict of (str, FeatureDescriptor)
        self.descriptor = descriptor

        # noinspection PyTypeChecker
        self.feature_memory = FeatureMemory.construct_from_files(
            descriptor.ids_file, descriptor.bg_flags_file,
            descriptor.feature_data_file, descriptor.kernel_data_file
        )
        # noinspection PyProtectedMember
        self._original_fm_bgid_set = self.feature_memory._bg_clip_ids

        # Mapping of a clip ID to the probability of it being associated to
        # positive adjudications. This is None before any refinement occurs.
        #: :type: None or dict of (int, float)
        self.results = None

        self.svm_train_params = '-q -t 4 -b 1 -w1 50 -c 100'

        # Ingest where extension images are placed
        extension_ingest_dir = osp.join(self.work_dir, "extension_ingest")
        self.extension_ingest = IngestManager(
            extension_ingest_dir,
            self.feature_memory.get_ids().max() + 1
        )

    def __del__(self):
        # Clean up working directory
        shutil.rmtree(self.work_dir)

    @property
    def ordered_results(self):
        """
        Return a tuple of the current (id, probability) result pairs in
        order of probability score. If there are no results yet, None is
        returned.

        """
        with self._session_lock:
            if self.results:
                return tuple(sorted(self.results.iteritems(),
                                    key=lambda p: p[1],
                                    reverse=True))
            return None

    def extend_model(self, *image_files):
        """
        Extend our data models given the following image file paths.

        Given image files are added to this session's extension ingest.

        :raises ValueError: If an image file is already ingested.

        :param image_files: Iterable of image file paths
        :type image_files: Iterable of str

        """
        with self._session_lock:
            p_pool = multiprocessing.pool.Pool()

            args = []
            for img in image_files:
                uid, md5, fpath = self.extension_ingest.ingest_image(img)
                args.append((self._log.name, self.descriptor, uid, fpath))

            self._log.info("Feature generation...")
            img_features = \
                p_pool.map_async(_iqr_async_image_feature, args).get()

            p_pool.close()
            p_pool.join()

            self._log.info("Updating FM")
            new_ids = []
            for img_id, img, feat in img_features:
                self._log.info("=== %s", img)
                # TODO: Update this function in FeatureMemory to take multiple
                #       ID, feature pairs (or parallel arrays)
                self.feature_memory.update(img_id, feat)
                new_ids.append(img_id)

            # adding new IDs to positive adjudications set
            self.positive_ids.update(new_ids)

    def adjudicate(self, new_positives=(), new_negatives=(),
                   un_positives=(), un_negatives=()):
        """
        Update current state of user defined positive and negative truths on
        specific image IDs

        :param new_positives: New IDs of items to now be considered positive.
        :type new_positives: tuple of int
        :param new_negatives: New IDs of items to now be considered negative.
        :param un_positives: New item IDs that are now not positive any more.
        :type un_positives: tuple of int
        :param un_negatives: New item IDs that are now not negative any more.
        :type un_negatives: tuple of int

        """
        with self._session_lock:
            self.positive_ids.update(new_positives)
            self.positive_ids.difference_update(un_positives)
            self.positive_ids.difference_update(new_negatives)

            self.negative_ids.update(new_negatives)
            self.negative_ids.difference_update(un_negatives)
            self.negative_ids.difference_update(new_positives)

            # # EXPERIMENT
            # # When we have negative adjudications, remove use of the original
            # # bg IDs set in the feature memory, injecting this session's
            # # negative ID set (all both use set objects, so just share the ptr)
            # # When we don't have negative adjudications, reinstate the original
            # # set of bg IDs.
            # if self.negative_ids:
            #     self.feature_memory._bg_clip_ids = self.negative_ids
            # else:
            #     self.feature_memory._bg_clip_ids = self._original_fm_bgid_set

            # # Update background flags in our feature_memory
            # # - new positives and un-negatives are now non-background
            # # - new negatives are now background.
            # for uid in set(new_positives).union(un_negatives):
            #     self._log.info("Marking UID %d as non-background", uid)
            #     self.feature_memory.update(uid, is_background=False)
            #     assert uid not in self.feature_memory.get_bg_ids()
            # for uid in new_negatives:
            #     self._log.info("Marking UID %d as background", uid)
            #     self.feature_memory.update(uid, is_background=True)
            #     assert uid in self.feature_memory.get_bg_ids()

    def refine(self, new_positives=(), new_negatives=(),
               un_positives=(), un_negatives=()):
        """ Refine current model results based on current adjudication state

        :raises RuntimeError: There are no adjudications to run on. We must have
            at least one positive adjudication.

        :param new_positives: New IDs of items to now be considered positive.
        :type new_positives: tuple of int
        :param new_negatives: New IDs of items to now be considered negative.
        :param un_positives: New item IDs that are now not positive any more.
        :type un_positives: tuple of int
        :param un_negatives: New item IDs that are now not negative any more.
        :type un_negatives: tuple of int

        """
        with self._session_lock:
            self.adjudicate(new_positives, new_negatives, un_positives,
                            un_negatives)

            if not self.positive_ids:
                raise RuntimeError("Did not find at least one positive "
                                   "adjudication.")

            #
            # Model training
            #
            self._log.info("Starting model training...")
            self._log.debug("-- Positives: %s", self.positive_ids)
            self._log.debug("-- Negatives: %s", self.negative_ids)

            # query submatrix of distance kernel for positive and background
            # IDs.
            self._log.debug("Extracting symmetric submatrix")
            idx2id_map, idx_bg_flags, m = \
                self.feature_memory\
                    .get_distance_kernel()\
                    .symmetric_submatrix(*self.positive_ids)
            self._log.debug("-- num bg: %d", idx_bg_flags.count(True))
            self._log.debug("-- m shape: %s", m.shape)

            # for model training function, inverse of idx_is_bg: True
            # indicates a positively adjudicated index
            labels_train = numpy.array(tuple(not b for b in idx_bg_flags))

            # # Where to save working models
            # model_filepath = osp.join(self.work_dir,
            #                           "iqr_session.%s.model" % self.uuid)
            # svIDs_filepath = osp.join(self.work_dir,
            #                           "iqr_session.%s.svIDs" % self.uuid)

            # Returned dictionary contains the keys "model" and "clipid_SVs"
            # referring to the trained model and a list of support vectors,
            # respectively.
            ret_dict = iqr_model_train(m, labels_train, idx2id_map,
                                       self.svm_train_params)
            svm_model = ret_dict['model']
            svm_svIDs = ret_dict['clipids_SVs']

            #
            # Model Testing/Application
            #
            self._log.info("Starting model application...")

            # As we're extracting rows, the sum of IDs are preserved along
            # the x-axis (column IDs). The list of IDs along the x-axis is
            # then effectively the ordered list of all IDs
            idx2id_row, idx2id_col, kernel_test = \
                self.feature_memory.get_distance_kernel()\
                                   .extract_rows(svm_svIDs)

            # Testing/Ranking call
            #   Passing the array version of the kernel sub-matrix. The
            #   returned output['probs'] type matches the type passed in
            #   here, and using an array makes syntax cleaner.
            self._log.debug("Ranking IDs")
            output = iqr_model_test(svm_model, kernel_test.A, idx2id_col)

            probability_map = dict(zip(output['clipids'], output['probs']))
            if self.results is None:
                self.results = IqrResultsDict()
            self.results.update(probability_map)

            # Force adjudicated negatives to be probability 0.0 since we don't
            # want them possibly polluting the further adjudication views.
            for uid in self.negative_ids:
                self.results[uid] = 0.0

    def reset(self):
        """ Reset the IQR Search state

        No positive adjudications, reload original feature data

        """
        with self._session_lock:
            self.positive_ids.clear()
            self.negative_ids.clear()
            # noinspection PyUnresolvedReferences
            self.feature_memory = FeatureMemory.construct_from_files(
                self.descriptor.ids_file, self.descriptor.bg_flags_file,
                self.descriptor.feature_data_file,
                self.descriptor.kernel_data_file
            )
            self.results = None

            # clear contents of working directory
            shutil.rmtree(self.work_dir)
            os.makedirs(self.work_dir)