Example #1
0
def negative_sample(pccs, per_pair=None):
    import utool as ut
    rng = ut.ensure_rng(2039141610, 'python')
    neg_pcc_pairs = ut.random_combinations(pccs, 2, rng=rng)
    yield from util.roundrobin(
        ut.random_product((cc1, cc2), num=per_pair, rng=rng)
        for cc1, cc2 in neg_pcc_pairs)
def testdata_smk(*args, **kwargs):
    """
    >>> from wbia.algo.smk.smk_pipeline import *  # NOQA
    >>> kwargs = {}
    """
    import wbia
    import sklearn
    import sklearn.model_selection

    # import sklearn.model_selection
    ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST')
    nid_list = np.array(ibs.annots(aid_list).nids)
    rng = ut.ensure_rng(0)
    xvalkw = dict(n_splits=4, shuffle=False, random_state=rng)

    skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
    train_idx, test_idx = six.next(skf.split(aid_list, nid_list))
    daids = ut.take(aid_list, train_idx)
    qaids = ut.take(aid_list, test_idx)

    config = {
        'num_words': 1000,
    }
    config.update(**kwargs)
    qreq_ = SMKRequest(ibs, qaids, daids, config)
    smk = qreq_.smk
    # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={'pipeline_root': 'smk', 'proot': 'smk'})
    # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={})
    return ibs, smk, qreq_
Example #3
0
    def __init__(self, pblm, pccs, dim=224):
        super(RandomBalancedIBEISSample, self).__init__(dim=dim)
        import utool as ut
        chip_config = {'resize_dim': 'wh', 'dim_size': (self.dim, self.dim)}
        self.pccs = pccs
        all_aids = ut.flatten(pccs)
        all_fpaths = pblm.infr.ibs.depc_annot.get('chips',
                                                  all_aids,
                                                  read_extern=False,
                                                  colnames='img',
                                                  config=chip_config)

        self.aid_to_fpath = dict(zip(all_aids, all_fpaths))

        # self.multitons_pccs = [pcc for pcc in pccs if len(pcc) > 1]
        self.pos_pairs = []

        # SAMPLE ALL POSSIBLE POS COMBINATIONS AND IGNORE INCOMPARABLE
        self.infr = pblm.infr
        # todo each sample should really get a weight depending on num aids in
        # its pcc
        for pcc in pccs:
            if len(pcc) >= 2:
                edges = np.array(
                    list(it.starmap(self.infr.e_, it.combinations(pcc, 2))))
                is_comparable = self.is_comparable(edges)
                pos_edges = edges[is_comparable]
                self.pos_pairs.extend(list(pos_edges))
        rng = ut.ensure_rng(563401, 'numpy')
        self.pyrng = ut.ensure_rng(564043, 'python')
        self.rng = rng

        if True:
            depends = [
                sorted(map(sorted, self.pccs)),
            ]
            hashid = hashutil.hash_data(depends)[:8]
            self.input_id = '{}-{}'.format(len(self), hashid)
Example #4
0
    def __init__(oracle, accuracy, rng):
        if isinstance(rng, six.string_types):
            rng = sum(map(ord, rng))
        rng = ut.ensure_rng(rng, impl='python')

        if isinstance(accuracy, tuple):
            oracle.normal_accuracy = accuracy[0]
            oracle.recover_accuracy = accuracy[1]
        else:
            oracle.normal_accuracy = accuracy
            oracle.recover_accuracy = accuracy
        # .5

        oracle.rng = rng
        oracle.states = {POSTV, NEGTV, INCMP}
Example #5
0
    def __init__(self, img1_fpaths, img2_fpaths, labels, dim=224):
        super(LabeledPairDataset, self).__init__(dim=dim)
        assert len(img1_fpaths) == len(img2_fpaths)
        assert len(labels) == len(img2_fpaths)
        self.img1_fpaths = list(img1_fpaths)
        self.img2_fpaths = list(img2_fpaths)
        self.labels = list(labels)

        # Hack for input id
        if True:
            depends = [self.img1_fpaths, self.img2_fpaths, self.labels]
            hashid = hashutil.hash_data(depends)[:8]
            self.input_id = '{}-{}'.format(len(self), hashid)

        import utool as ut
        rng = ut.ensure_rng(3432, 'numpy')
        self.rng = rng
Example #6
0
def stratified_label_shuffle_split(y, labels, fractions, y_idx=None, rng=None):
    """
    modified from sklearn to make n splits instaed of 2.
    Also enforces that labels are not broken into separate groups.

    Args:
        y (ndarray):  labels
        labels (?):
        fractions (?):
        rng (RandomState):  random number generator(default = None)

    Returns:
        ?: index_sets

    CommandLine:
        python -m ibeis_cnn.dataset stratified_label_shuffle_split --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.dataset import *  # NOQA
        >>> y      = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7]
        >>> fractions = [.7, .3]
        >>> rng = np.random.RandomState(0)
        >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng)
    """
    rng = ut.ensure_rng(rng)
    #orig_y = y
    unique_labels, groupxs = ut.group_indices(labels)
    grouped_ys = ut.apply_grouping(y, groupxs)
    # Assign each group a probabilistic class
    unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys]
    # TODO: should weight the following selection based on size of group
    #class_weights = [ut.dict_hist(ys) for ys in grouped_ys]

    unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng)
    index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs]
    if y_idx is not None:
        # These indicies subindex into parent set of indicies
        index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets]
    return index_sets
Example #7
0
def stratified_kfold_label_split(y, labels, n_folds=2, y_idx=None, rng=None):
    """
    Also enforces that labels are not broken into separate groups.

    Args:
        y (ndarray):  labels
        labels (?):
        y_idx (array): indexes associated with y if it was already presampled
        rng (RandomState):  random number generator(default = None)

    Returns:
        ?: index_sets

    CommandLine:
        python -m ibeis_cnn.dataset stratified_label_shuffle_split --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis_cnn.dataset import *  # NOQA
        >>> y      = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> labels = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 0, 7, 7, 7, 7]
        >>> fractions = [.7, .3]
        >>> rng = np.random.RandomState(0)
        >>> index_sets = stratified_label_shuffle_split(y, labels, fractions, rng)
    """

    rng = ut.ensure_rng(rng)
    #orig_y = y
    unique_labels, groupxs = ut.group_indices(labels)
    grouped_ys = ut.apply_grouping(y, groupxs)
    # Assign each group a probabilistic class
    unique_ys = [ys[rng.randint(0, len(ys))] for ys in grouped_ys]
    # TODO: should weight the following selection based on size of group
    #class_weights = [ut.dict_hist(ys) for ys in grouped_ys]

    import sklearn.cross_validation
    xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng)
    skf = sklearn.cross_validation.StratifiedKFold(unique_ys, **xvalkw)
    _iter = skf

    folded_index_sets = []

    for label_idx_set in _iter:
        index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs)))
                      for idxs in label_idx_set]
        folded_index_sets.append(index_sets)

    for train_idx, test_idx in folded_index_sets:
        train_labels = set(ut.take(labels, train_idx))
        test_labels = set(ut.take(labels, test_idx))
        assert len(test_labels.intersection(train_labels)) == 0, 'same labels appeared in both train and test'
        pass

    if y_idx is not None:
        # These indicies subindex into parent set of indicies
        folded_index_sets2 = []
        for index_sets in folded_index_sets:
            index_sets = [np.take(y_idx, idxs, axis=0) for idxs in index_sets]
            folded_index_sets2.append(index_sets)
        folded_index_sets = folded_index_sets2
    #import sklearn.model_selection
    #skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
    #_iter = skf.split(X=np.empty(len(target)), y=target)

    #unique_idxs = stratified_shuffle_split(unique_ys, fractions, rng)
    #index_sets = [np.array(ut.flatten(ut.take(groupxs, idxs))) for idxs in unique_idxs]
    #if idx is not None:
    #    # These indicies subindex into parent set of indicies
    #    index_sets = [np.take(idx, idxs, axis=0) for idxs in index_sets]
    return folded_index_sets
Example #8
0
def positive_sample(pccs, per_cc=None):
    import utool as ut
    rng = ut.ensure_rng(2039141610, 'python')
    yield from util.roundrobin(
        ut.random_combinations(cc, size=2, num=per_cc, rng=rng) for cc in pccs)
Example #9
0
    def _cm_training_pairs(
        infr,
        qreq_=None,
        cm_list=None,
        top_gt=2,
        mid_gt=2,
        bot_gt=2,
        top_gf=2,
        mid_gf=2,
        bot_gf=2,
        rand_gt=2,
        rand_gf=2,
        rng=None,
    ):
        """
        Constructs training data for a pairwise classifier

        CommandLine:
            python -m wbia.algo.graph.core _cm_training_pairs

        Example:
            >>> # xdoctest: +REQUIRES(--slow)
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.graph.core import *  # NOQA
            >>> infr = testdata_infr('PZ_MTEST')
            >>> infr.exec_matching(cfgdict={
            >>>     'can_match_samename': True,
            >>>     'K': 4,
            >>>     'Knorm': 1,
            >>>     'prescore_method': 'csum',
            >>>     'score_method': 'csum'
            >>> })
            >>> from wbia.algo.graph.core import *  # NOQA
            >>> exec(ut.execstr_funckw(infr._cm_training_pairs))
            >>> rng = np.random.RandomState(42)
            >>> aid_pairs = np.array(infr._cm_training_pairs(rng=rng))
            >>> print(len(aid_pairs))
            >>> assert np.sum(aid_pairs.T[0] == aid_pairs.T[1]) == 0
        """
        if qreq_ is None:
            cm_list = infr.cm_list
            qreq_ = infr.qreq_
        ibs = infr.ibs
        aid_pairs = []
        dnids = qreq_.get_qreq_annot_nids(qreq_.daids)
        # dnids = qreq_.get_qreq_annot_nids(qreq_.daids)
        rng = ut.ensure_rng(rng)
        for cm in ut.ProgIter(cm_list, lbl='building pairs'):
            all_gt_aids = cm.get_top_gt_aids(ibs)
            all_gf_aids = cm.get_top_gf_aids(ibs)
            gt_aids = ut.take_percentile_parts(all_gt_aids, top_gt, mid_gt, bot_gt)
            gf_aids = ut.take_percentile_parts(all_gf_aids, top_gf, mid_gf, bot_gf)
            # get unscored examples
            unscored_gt_aids = [
                aid for aid in qreq_.daids[cm.qnid == dnids] if aid not in cm.daid2_idx
            ]
            rand_gt_aids = ut.random_sample(unscored_gt_aids, rand_gt, rng=rng)
            # gf_aids = cm.get_groundfalse_daids()
            _gf_aids = qreq_.daids[cm.qnid != dnids]
            _gf_aids = qreq_.daids.compress(cm.qnid != dnids)
            # gf_aids = ibs.get_annot_groundfalse(cm.qaid, daid_list=qreq_.daids)
            rand_gf_aids = ut.random_sample(_gf_aids, rand_gf, rng=rng).tolist()
            chosen_daids = ut.unique(gt_aids + gf_aids + rand_gf_aids + rand_gt_aids)
            aid_pairs.extend([(cm.qaid, aid) for aid in chosen_daids if cm.qaid != aid])

        return aid_pairs