Esempio n. 1
0
def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None):
    """ Gets database annot_rowids based on command line arguments

    DEPRICATE

    CommandLine:
        python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra

    Args:
        ibs (IBEISController):  ibeis controller object
        default_daids (str): (default = 'all')
        qaid_list (list): list of chosen qaids that may affect daids (default = None)

    Returns:
        list: available_daids

    CommandLine:
        python -m ibeis.init.main_helpers --test-get_test_daids
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST  --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp
        python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.init.main_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> default_daids = 'all'
        >>> qaid_list = [1]
        >>> available_daids = get_test_daids(ibs, default_daids, qaid_list)
        >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True)
        >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False)
        >>> print('len(available_daids) %d' % len(available_daids))
        >>> print(result)
        available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    """
    daid_request_info = {}

    if VERB_MAIN_HELPERS:
        print('[get_test_daids] + --- GET_TEST_DAIDS ---')
        print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False)))
        print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False)))

    # ---- INCLUDING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * include step')

    available_daids = []

    CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids')
    DSHUFFLE = ut.get_argval('--dshuffle')
    DINDEX = params.args.dindex
    NO_JUNK = not ut.get_argflag('--junk')
    EXCLUDE_QUERY = ut.get_argflag('--exclude-query')
    #daids_exclude = params.args.daid_exclude
    daids_exclude = None

    if CONTROLLED_CASES:
        print('[get_test_daids] * Including controlled daids')
        from ibeis.other import ibsfuncs
        controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False)
        available_daids.extend(controlled_daids)
        daid_request_info['controlled'] = True
    else:
        daid_request_info['controlled'] = False

    # ---- CHECK_DEFAULTS DATA
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))

    if len(available_daids) == 0:
        print('[get_test_daids] * ... defaulting, no available daids on command line.')
        if isinstance(default_daids, six.string_types):
            if default_daids == 'all':
                default_daids = ibs.get_valid_aids()
                daid_request_info['default_daids'] = 'all'
            elif default_daids == 'gt':
                default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list))
                daid_request_info['default_daids'] = 'gt'
        #available_qaids = valid_aids[0:1]
        assert not isinstance(available_daids, six.string_types)
        available_daids = default_daids
    else:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * ... not defaulting')

    available_daids = ut.unique_ordered(available_daids)

    # ---- EXCLUSION STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] * exclude step')

    species = ut.get_argval('--species', type_=str, default=None)

    if NO_JUNK:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Filtering junk')
        available_daids = ibs.filter_junk_annotations(available_daids)

    if EXCLUDE_QUERY:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Excluding query qaids')
        assert qaid_list is not None, 'must specify qaids to exclude'
        available_daids = ut.setdiff_ordered(available_daids, qaid_list)

    if daids_exclude is not None:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Excluding specified daids')
        available_daids = ut.setdiff_ordered(available_daids, daids_exclude)

    if species is not None:
        if species == 'primary':
            if VERB_MAIN_HELPERS:
                print('[get_test_qaids] * Finiding primary species')
            #species = ibs.get_primary_database_species(available_daids)
            species = ibs.get_primary_database_species()
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Filtering to species=%r' % (species,))
        import numpy as np
        isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species
        available_daids = ut.compress(available_daids, isvalid_list)

    # ---- SUBINDEXING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] * subindex step')

    #ut.get_argval('--qshuffle')
    if DSHUFFLE:
        # Determenistic shuffling
        available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43))
        daid_request_info['shuffled'] = True

    if DINDEX is not None:
        dindexes = ensure_flatlistlike(DINDEX)
        _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)]
        print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids)))
        available_daids = _test_daids

    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] L ___ GET_TEST_DAIDS ___')

    if return_annot_info:
        return available_daids, daid_request_info
    else:
        return available_daids
Esempio n. 2
0
def test_siamese_performance(model, data, labels, flat_metadata, dataname=''):
    r"""
    CommandLine:
        utprof.py -m ibeis_cnn --tf pz_patchmatch --db liberty --test --weights=liberty:current --arch=siaml2_128 --test
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test  --ensure
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --test  --ensure --weights=new
        python -m ibeis_cnn --tf netrun --db liberty --arch=siaml2_128 --train --weights=new
        python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128 --test  # NOQA
        python -m ibeis_cnn --tf netrun --db pzmtest --weights=liberty:current --arch=siaml2_128
    """
    import vtool as vt
    import plottool as pt

    # TODO: save in model.trainind_dpath/diagnostics/figures
    ut.colorprint('\n[siam_perf] Testing Siamese Performance', 'white')
    #epoch_dpath = model.get_epoch_diagnostic_dpath()
    epoch_dpath = model.arch_dpath
    ut.vd(epoch_dpath)

    dataname += ' ' + model.get_history_hashid() + '\n'

    history_text = ut.list_str(model.era_history, newlines=True)

    ut.write_to(ut.unixjoin(epoch_dpath, 'era_history.txt'), history_text)

    #if True:
    #    import matplotlib as mpl
    #    mpl.rcParams['agg.path.chunksize'] = 100000

    #data   = data[::50]
    #labels = labels[::50]
    #from ibeis_cnn import utils
    #data, labels = utils.random_xy_sample(data, labels, 10000, model.data_per_label_input)

    FULL = not ut.get_argflag('--quick')

    fnum_gen = pt.make_fnum_nextgen()

    ut.colorprint('[siam_perf] Show era history', 'white')
    fig = model.show_era_loss(fnum=fnum_gen())
    pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)

    # hack
    ut.colorprint('[siam_perf] Show weights image', 'white')
    fig = model.show_weights_image(fnum=fnum_gen())
    pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)
    #model.draw_all_conv_layer_weights(fnum=fnum_gen())
    #model.imwrite_weights(1)
    #model.imwrite_weights(2)

    # Compute each type of score
    ut.colorprint('[siam_perf] Building Scores', 'white')
    test_outputs = model.predict2(model, data)
    network_output = test_outputs['network_output_determ']
    # hack converting network output to distances for non-descriptor networks
    if len(network_output.shape) == 2 and network_output.shape[1] == 1:
        cnn_scores = network_output.T[0]
    elif len(network_output.shape) == 1:
        cnn_scores = network_output
    elif len(network_output.shape) == 2 and network_output.shape[1] > 1:
        assert model.data_per_label_output == 2
        vecs1 = network_output[0::2]
        vecs2 = network_output[1::2]
        cnn_scores = vt.L2(vecs1, vecs2)
    else:
        assert False
    cnn_scores = cnn_scores.astype(np.float64)

    # Segfaults with the data passed in is large (AND MEMMAPPED apparently)
    # Fixed in hesaff implementation
    SIFT = FULL
    if SIFT:
        sift_scores, sift_list = test_sift_patchmatch_scores(data, labels)
        sift_scores = sift_scores.astype(np.float64)

    ut.colorprint('[siam_perf] Learning Encoders', 'white')
    # Learn encoders
    encoder_kw = {
        #'monotonize': False,
        'monotonize': True,
    }
    cnn_encoder = vt.ScoreNormalizer(**encoder_kw)
    cnn_encoder.fit(cnn_scores, labels)

    if SIFT:
        sift_encoder = vt.ScoreNormalizer(**encoder_kw)
        sift_encoder.fit(sift_scores, labels)

    # Visualize
    ut.colorprint('[siam_perf] Visualize Encoders', 'white')
    viz_kw = dict(
        with_scores=False,
        with_postbayes=False,
        with_prebayes=False,
        target_tpr=.95,
    )
    inter_cnn = cnn_encoder.visualize(
        figtitle=dataname + ' CNN scores. #data=' + str(len(data)),
        fnum=fnum_gen(), **viz_kw)
    if SIFT:
        inter_sift = sift_encoder.visualize(
            figtitle=dataname + ' SIFT scores. #data=' + str(len(data)),
            fnum=fnum_gen(), **viz_kw)

    # Save
    pt.save_figure(fig=inter_cnn.fig, dpath=epoch_dpath)
    if SIFT:
        pt.save_figure(fig=inter_sift.fig, dpath=epoch_dpath)

    # Save out examples of hard errors
    #cnn_fp_label_indicies, cnn_fn_label_indicies =
    #cnn_encoder.get_error_indicies(cnn_scores, labels)
    #sift_fp_label_indicies, sift_fn_label_indicies =
    #sift_encoder.get_error_indicies(sift_scores, labels)

    with_patch_examples = FULL
    if with_patch_examples:
        ut.colorprint('[siam_perf] Visualize Confusion Examples', 'white')
        cnn_indicies = cnn_encoder.get_confusion_indicies(cnn_scores, labels)
        if SIFT:
            sift_indicies = sift_encoder.get_confusion_indicies(sift_scores, labels)

        warped_patch1_list, warped_patch2_list = list(zip(*ut.ichunks(data, 2)))
        samp_args = (warped_patch1_list, warped_patch2_list, labels)
        _sample = functools.partial(draw_results.get_patch_sample_img, *samp_args)

        cnn_fp_img = _sample({'fs': cnn_scores}, cnn_indicies.fp)[0]
        cnn_fn_img = _sample({'fs': cnn_scores}, cnn_indicies.fn)[0]
        cnn_tp_img = _sample({'fs': cnn_scores}, cnn_indicies.tp)[0]
        cnn_tn_img = _sample({'fs': cnn_scores}, cnn_indicies.tn)[0]

        if SIFT:
            sift_fp_img = _sample({'fs': sift_scores}, sift_indicies.fp)[0]
            sift_fn_img = _sample({'fs': sift_scores}, sift_indicies.fn)[0]
            sift_tp_img = _sample({'fs': sift_scores}, sift_indicies.tp)[0]
            sift_tn_img = _sample({'fs': sift_scores}, sift_indicies.tn)[0]

        #if ut.show_was_requested():
        #def rectify(arr):
        #    return np.flipud(arr)
        SINGLE_FIG = False
        if SINGLE_FIG:
            def dump_img(img_, lbl, fnum):
                fig, ax = pt.imshow(img_, figtitle=dataname + ' ' + lbl, fnum=fnum)
                pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180)
            dump_img(cnn_fp_img, 'cnn_fp_img', fnum_gen())
            dump_img(cnn_fn_img, 'cnn_fn_img', fnum_gen())
            dump_img(cnn_tp_img, 'cnn_tp_img', fnum_gen())
            dump_img(cnn_tn_img, 'cnn_tn_img', fnum_gen())

            dump_img(sift_fp_img, 'sift_fp_img', fnum_gen())
            dump_img(sift_fn_img, 'sift_fn_img', fnum_gen())
            dump_img(sift_tp_img, 'sift_tp_img', fnum_gen())
            dump_img(sift_tn_img, 'sift_tn_img', fnum_gen())
            #vt.imwrite(dataname + '_' + 'cnn_fp_img.png', (cnn_fp_img))
            #vt.imwrite(dataname + '_' + 'cnn_fn_img.png', (cnn_fn_img))
            #vt.imwrite(dataname + '_' + 'sift_fp_img.png', (sift_fp_img))
            #vt.imwrite(dataname + '_' + 'sift_fn_img.png', (sift_fn_img))
        else:
            print('Drawing TP FP TN FN')
            fnum = fnum_gen()
            pnum_gen = pt.make_pnum_nextgen(4, 2)
            fig = pt.figure(fnum)
            pt.imshow(cnn_fp_img,  title='CNN FP',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_fp_img, title='SIFT FP', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_fn_img,  title='CNN FN',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_fn_img, title='SIFT FN', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_tp_img,  title='CNN TP',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_tp_img, title='SIFT TP', fnum=fnum, pnum=pnum_gen())
            pt.imshow(cnn_tn_img,  title='CNN TN',  fnum=fnum, pnum=pnum_gen())
            pt.imshow(sift_tn_img, title='SIFT TN', fnum=fnum, pnum=pnum_gen())
            pt.set_figtitle(dataname + ' confusions')
            pt.adjust_subplots(left=0, right=1.0, bottom=0., wspace=.01, hspace=.05)
            pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18))

    with_patch_desc = FULL
    if with_patch_desc:
        ut.colorprint('[siam_perf] Visualize Patch Descriptors', 'white')
        fnum = fnum_gen()
        fig = pt.figure(fnum=fnum, pnum=(1, 1, 1))
        num_rows = 7
        pnum_gen = pt.make_pnum_nextgen(num_rows, 3)
        # Compare actual output descriptors
        for index in ut.random_indexes(len(sift_list), num_rows):
            vec_sift = sift_list[index]
            vec_cnn = network_output[index]
            patch = data[index]
            pt.imshow(patch, fnum=fnum, pnum=pnum_gen())
            pt.plot_descriptor_signature(vec_cnn, 'cnn vec',  fnum=fnum, pnum=pnum_gen())
            pt.plot_sift_signature(vec_sift, 'sift vec',  fnum=fnum, pnum=pnum_gen())
        pt.set_figtitle('Patch Descriptors')
        pt.adjust_subplots(left=0, right=0.95, bottom=0., wspace=.1, hspace=.15)
        pt.save_figure(fig=fig, dpath=epoch_dpath, dpi=180, figsize=(9, 18))
Esempio n. 3
0
def get_test_qaids(ibs, default_qaids=None, return_annot_info=False, aidcfg=None):
    """
    Gets test annot_rowids based on command line arguments

    DEPRICATE

    Args:
        ibs (IBEISController):  ibeis controller object
        default_qaids (None): if list then used only if no other aids are available (default = [1])
           as a string it mimics the command line

    Returns:
        list: available_qaids

    CommandLine:
        python -m ibeis.init.main_helpers --test-get_test_qaids
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qaid 1
        python -m ibeis.init.main_helpers --test-get_test_qaids --allgt --db PZ_MTEST
        python -m ibeis.init.main_helpers --test-get_test_qaids --qaid 4 5 8  --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qindex 0:10 --verbmhelp
        python -m ibeis.init.main_helpers --exec-get_test_qaids --controlled --db PZ_Master0 --exec-mode
        python -m ibeis.init.main_helpers --exec-get_test_qaids --db testdb1 --allgt --qindex 0:256

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.init.main_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> default_qaids = None
        >>> available_qaids = get_test_qaids(ibs, default_qaids)
        >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_qaids, with_contrib=False, short=True)
        >>> result = 'available_qaids = ' + ut.obj_str(available_qaids, truncate=True, nl=False)
        >>> print('len(available_qaids) = %d' % len(available_qaids))
        >>> print(result)
        available_qaids = [1]
    """
    qaid_request_info = {}
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] + --- GET_TEST_QAIDS ---')

    # Old version of this function
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] + --- GET_TEST_QAIDS ---')
        print('[get_test_qaids] * default_qaids = %s' % (ut.obj_str(default_qaids, truncate=True, nl=False)))

    valid_aids = ibs.get_valid_aids()

    if len(valid_aids) == 0:
        print('[get_test_qaids] WARNING no annotations available')

    # ---- INCLUDING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * include step')

    available_qaids = []

    #ut.get_argflag(('--all-cases', '--all'))
    #ut.get_argflag(('--all-gt-cases', '--allgt'))
    #ut.get_argflag(('--all-hard-cases', '--allhard'))
    #ut.get_argflag(('--qaid', '--qaids'))
    #ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids')
    #not ut.get_argflag('--junk')

    ALL_CASES = params.args.all_cases or default_qaids == 'all'
    GT_CASES = params.args.all_gt_cases or default_qaids == 'gt'
    HARD_CASES = params.args.all_hard_cases or ut.get_argflag(('--all-hard-cases', '--allhard', '--hard'))
    NO_JUNK = not ut.get_argflag('--junk')
    CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids')
    NO_REVIEWED = ut.get_argflag('--unreviewed')
    species = ut.get_argval('--species')
    #QAID = params.args.qaid
    QAID = ut.get_argval('--qaid', type_='fuzzy_subset', default=None)
    QINDEX = params.args.qindex
    QSHUFFLE = ut.get_argval('--qshuffle')

    if QAID is not None:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including cmdline specified qaids')
        try:
            args_qaid = ensure_flatlistlike(QAID)
        except Exception:
            args_qaid = QAID
        available_qaids.extend(args_qaid)
        qaid_request_info['custom_commandline'] = args_qaid

    if ALL_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including all qaids')
        available_qaids.extend(valid_aids)
        qaid_request_info['all_cases'] = True

    if HARD_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including hard qaids')
        is_hard_list = ibs.get_annot_is_hard(valid_aids)
        hard_aids = ut.compress(valid_aids, is_hard_list)
        available_qaids.extend(hard_aids)
        qaid_request_info['hard_cases'] = True

    if GT_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including groundtruth qaids')
        has_gt_list = ibs.get_annot_has_groundtruth(valid_aids)
        hasgt_aids = ut.compress(valid_aids, has_gt_list)
        print('[get_test_qaids] Adding all %d/%d ground-truthed test cases' % (len(hasgt_aids), len(valid_aids)))
        available_qaids.extend(hasgt_aids)
        qaid_request_info['gt_cases'] = True

    if CONTROLLED_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including controlled qaids')
        from ibeis.other import ibsfuncs
        # Override all other gts with controlled
        controlled_qaids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=True)
        available_qaids.extend(controlled_qaids)
        qaid_request_info['controlled'] = True
    else:
        qaid_request_info['controlled'] = False

    # ---- CHECK_DEFAULTS QUERY
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))

    if len(available_qaids) == 0:
        print('[get_test_qaids] * ... defaulting, no available qaids on command line.')
        if default_qaids is None:
            default_qaids = valid_aids[0:1]
            qaid_request_info['default_one'] = True
        elif isinstance(default_qaids, six.string_types):
            if default_qaids == 'gt' or default_qaids == 'allgt':
                default_qaids = ibs.get_valid_aids(hasgt=True)
                qaid_request_info['default_gt'] = True
        available_qaids = default_qaids
    else:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * ... not defaulting')

    available_qaids = ut.unique_ordered(available_qaids)

    # ---- EXCLUSION STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] * exclude step')

    if NO_JUNK:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering junk')
        available_qaids = ibs.filter_junk_annotations(available_qaids)
        qaid_request_info['has_junk'] = False

    if NO_REVIEWED:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering unreviewed')
        isreviewed_list = ibs.get_annot_has_reviewed_matching_aids(available_qaids)
        available_qaids = ut.filterfalse_items(available_qaids, isreviewed_list)
        qaid_request_info['has_unreviewed'] = False

    if species is not None:
        if species == 'primary':
            if VERB_MAIN_HELPERS:
                print('[get_test_qaids] * Finiding primary species')
            #species = ibs.get_primary_database_species(available_qaids)
            species = ibs.get_primary_database_species()
            qaid_request_info['primary_species'] = True

        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering to species=%r' % (species,))
        isvalid_list = np.array(ibs.get_annot_species(available_qaids)) == species
        available_qaids = ut.compress(available_qaids, isvalid_list)
        qaid_request_info['species_filter'] = species

    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] * subindex step')

    # ---- INDEX SUBSET

    #ut.get_argval('--qshuffle')
    if QSHUFFLE:
        # Determenistic shuffling
        available_qaids = ut.take(available_qaids, ut.random_indexes(len(available_qaids), seed=42))
        qaid_request_info['shuffled'] = True

    # Sample a large pool of chosen query qindexes
    if QINDEX is not None:
        # FIXME: should use a slice of the list or a sublist
        qindexes = ensure_flatlistlike(QINDEX)
        _test_qaids = [available_qaids[qx] for qx in qindexes if qx < len(available_qaids)]
        print('[get_test_qaids] Chose subset of size %d/%d' % (len(_test_qaids), len(available_qaids)))
        available_qaids = _test_qaids
        qaid_request_info['subset'] = qindexes

    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] L ___ GET_TEST_QAIDS ___')
    if return_annot_info:
        return available_qaids, qaid_request_info
    else:
        return available_qaids
Esempio n. 4
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Esempio n. 5
0
def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None):
    """ Gets database annot_rowids based on command line arguments

    DEPRICATE

    CommandLine:
        python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra

    Args:
        ibs (IBEISController):  ibeis controller object
        default_daids (str): (default = 'all')
        qaid_list (list): list of chosen qaids that may affect daids (default = None)

    Returns:
        list: available_daids

    CommandLine:
        python -m ibeis.init.main_helpers --test-get_test_daids
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST  --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys
        python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp
        python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.init.main_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> default_daids = 'all'
        >>> qaid_list = [1]
        >>> available_daids = get_test_daids(ibs, default_daids, qaid_list)
        >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True)
        >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False)
        >>> print('len(available_daids) %d' % len(available_daids))
        >>> print(result)
        available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    """
    daid_request_info = {}

    if VERB_MAIN_HELPERS:
        print('[get_test_daids] + --- GET_TEST_DAIDS ---')
        print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False)))
        print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False)))

    # ---- INCLUDING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * include step')

    available_daids = []

    CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids')
    DSHUFFLE = ut.get_argval('--dshuffle')
    DINDEX = params.args.dindex
    NO_JUNK = not ut.get_argflag('--junk')
    EXCLUDE_QUERY = ut.get_argflag('--exclude-query')
    #daids_exclude = params.args.daid_exclude
    daids_exclude = None

    if CONTROLLED_CASES:
        print('[get_test_daids] * Including controlled daids')
        from ibeis import ibsfuncs
        controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False)
        available_daids.extend(controlled_daids)
        daid_request_info['controlled'] = True
    else:
        daid_request_info['controlled'] = False

    # ---- CHECK_DEFAULTS DATA
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))

    if len(available_daids) == 0:
        print('[get_test_daids] * ... defaulting, no available daids on command line.')
        if isinstance(default_daids, six.string_types):
            if default_daids == 'all':
                default_daids = ibs.get_valid_aids()
                daid_request_info['default_daids'] = 'all'
            elif default_daids == 'gt':
                default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list))
                daid_request_info['default_daids'] = 'gt'
        #available_qaids = valid_aids[0:1]
        assert not isinstance(available_daids, six.string_types)
        available_daids = default_daids
    else:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * ... not defaulting')

    available_daids = ut.unique_keep_order(available_daids)

    # ---- EXCLUSION STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] * exclude step')

    species = ut.get_argval('--species', type_=str, default=None)

    if NO_JUNK:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Filtering junk')
        available_daids = ibs.filter_junk_annotations(available_daids)

    if EXCLUDE_QUERY:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Excluding query qaids')
        assert qaid_list is not None, 'must specify qaids to exclude'
        available_daids = ut.setdiff_ordered(available_daids, qaid_list)

    if daids_exclude is not None:
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Excluding specified daids')
        available_daids = ut.setdiff_ordered(available_daids, daids_exclude)

    if species is not None:
        if species == 'primary':
            if VERB_MAIN_HELPERS:
                print('[get_test_qaids] * Finiding primary species')
            #species = ibs.get_primary_database_species(available_daids)
            species = ibs.get_primary_database_species()
        if VERB_MAIN_HELPERS:
            print('[get_test_daids] * Filtering to species=%r' % (species,))
        import numpy as np
        isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species
        available_daids = ut.compress(available_daids, isvalid_list)

    # ---- SUBINDEXING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] * subindex step')

    #ut.get_argval('--qshuffle')
    if DSHUFFLE:
        # Determenistic shuffling
        available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43))
        daid_request_info['shuffled'] = True

    if DINDEX is not None:
        dindexes = ensure_flatlistlike(DINDEX)
        _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)]
        print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids)))
        available_daids = _test_daids

    if VERB_MAIN_HELPERS:
        print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids)))
        print('[get_test_daids] L ___ GET_TEST_DAIDS ___')

    if return_annot_info:
        return available_daids, daid_request_info
    else:
        return available_daids
Esempio n. 6
0
def get_test_qaids(ibs, default_qaids=None, return_annot_info=False, aidcfg=None):
    """
    Gets test annot_rowids based on command line arguments

    DEPRICATE

    Args:
        ibs (IBEISController):  ibeis controller object
        default_qaids (None): if list then used only if no other aids are available (default = [1])
           as a string it mimics the command line

    Returns:
        list: available_qaids

    CommandLine:
        python -m ibeis.init.main_helpers --test-get_test_qaids
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qaid 1
        python -m ibeis.init.main_helpers --test-get_test_qaids --allgt --db PZ_MTEST
        python -m ibeis.init.main_helpers --test-get_test_qaids --qaid 4 5 8  --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2 --verbmhelp
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_MTEST --qaid 2
        python -m ibeis.init.main_helpers --test-get_test_qaids --controlled --db PZ_Master0 --qindex 0:10 --verbmhelp
        python -m ibeis.init.main_helpers --exec-get_test_qaids --controlled --db PZ_Master0 --exec-mode
        python -m ibeis.init.main_helpers --exec-get_test_qaids --db testdb1 --allgt --qindex 0:256

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.init.main_helpers import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> default_qaids = None
        >>> available_qaids = get_test_qaids(ibs, default_qaids)
        >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_qaids, with_contrib=False, short=True)
        >>> result = 'available_qaids = ' + ut.obj_str(available_qaids, truncate=True, nl=False)
        >>> print('len(available_qaids) = %d' % len(available_qaids))
        >>> print(result)
        available_qaids = [1]
    """
    qaid_request_info = {}
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] + --- GET_TEST_QAIDS ---')

    # Old version of this function
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] + --- GET_TEST_QAIDS ---')
        print('[get_test_qaids] * default_qaids = %s' % (ut.obj_str(default_qaids, truncate=True, nl=False)))

    valid_aids = ibs.get_valid_aids()

    if len(valid_aids) == 0:
        print('[get_test_qaids] WARNING no annotations available')

    # ---- INCLUDING STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * include step')

    available_qaids = []

    #ut.get_argflag(('--all-cases', '--all'))
    #ut.get_argflag(('--all-gt-cases', '--allgt'))
    #ut.get_argflag(('--all-hard-cases', '--allhard'))
    #ut.get_argflag(('--qaid', '--qaids'))
    #ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids')
    #not ut.get_argflag('--junk')

    ALL_CASES = params.args.all_cases or default_qaids == 'all'
    GT_CASES = params.args.all_gt_cases or default_qaids == 'gt'
    HARD_CASES = params.args.all_hard_cases or ut.get_argflag(('--all-hard-cases', '--allhard', '--hard'))
    NO_JUNK = not ut.get_argflag('--junk')
    CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_qaids')
    NO_REVIEWED = ut.get_argflag('--unreviewed')
    species = ut.get_argval('--species')
    #QAID = params.args.qaid
    QAID = ut.get_argval('--qaid', type_='fuzzy_subset', default=None)
    QINDEX = params.args.qindex
    QSHUFFLE = ut.get_argval('--qshuffle')

    if QAID is not None:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including cmdline specified qaids')
        try:
            args_qaid = ensure_flatlistlike(QAID)
        except Exception:
            args_qaid = QAID
        available_qaids.extend(args_qaid)
        qaid_request_info['custom_commandline'] = args_qaid

    if ALL_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including all qaids')
        available_qaids.extend(valid_aids)
        qaid_request_info['all_cases'] = True

    if HARD_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including hard qaids')
        is_hard_list = ibs.get_annot_is_hard(valid_aids)
        hard_aids = ut.compress(valid_aids, is_hard_list)
        available_qaids.extend(hard_aids)
        qaid_request_info['hard_cases'] = True

    if GT_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including groundtruth qaids')
        has_gt_list = ibs.get_annot_has_groundtruth(valid_aids)
        hasgt_aids = ut.compress(valid_aids, has_gt_list)
        print('[get_test_qaids] Adding all %d/%d ground-truthed test cases' % (len(hasgt_aids), len(valid_aids)))
        available_qaids.extend(hasgt_aids)
        qaid_request_info['gt_cases'] = True

    if CONTROLLED_CASES:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Including controlled qaids')
        from ibeis import ibsfuncs
        # Override all other gts with controlled
        controlled_qaids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=True)
        available_qaids.extend(controlled_qaids)
        qaid_request_info['controlled'] = True
    else:
        qaid_request_info['controlled'] = False

    # ---- CHECK_DEFAULTS QUERY
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))

    if len(available_qaids) == 0:
        print('[get_test_qaids] * ... defaulting, no available qaids on command line.')
        if default_qaids is None:
            default_qaids = valid_aids[0:1]
            qaid_request_info['default_one'] = True
        elif isinstance(default_qaids, six.string_types):
            if default_qaids == 'gt' or default_qaids == 'allgt':
                default_qaids = ibs.get_valid_aids(hasgt=True)
                qaid_request_info['default_gt'] = True
        available_qaids = default_qaids
    else:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * ... not defaulting')

    available_qaids = ut.unique_keep_order(available_qaids)

    # ---- EXCLUSION STEP
    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] * exclude step')

    if NO_JUNK:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering junk')
        available_qaids = ibs.filter_junk_annotations(available_qaids)
        qaid_request_info['has_junk'] = False

    if NO_REVIEWED:
        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering unreviewed')
        isreviewed_list = ibs.get_annot_has_reviewed_matching_aids(available_qaids)
        available_qaids = ut.filterfalse_items(available_qaids, isreviewed_list)
        qaid_request_info['has_unreviewed'] = False

    if species is not None:
        if species == 'primary':
            if VERB_MAIN_HELPERS:
                print('[get_test_qaids] * Finiding primary species')
            #species = ibs.get_primary_database_species(available_qaids)
            species = ibs.get_primary_database_species()
            qaid_request_info['primary_species'] = True

        if VERB_MAIN_HELPERS:
            print('[get_test_qaids] * Filtering to species=%r' % (species,))
        isvalid_list = np.array(ibs.get_annot_species(available_qaids)) == species
        available_qaids = ut.compress(available_qaids, isvalid_list)
        qaid_request_info['species_filter'] = species

    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] * subindex step')

    # ---- INDEX SUBSET

    #ut.get_argval('--qshuffle')
    if QSHUFFLE:
        # Determenistic shuffling
        available_qaids = ut.take(available_qaids, ut.random_indexes(len(available_qaids), seed=42))
        qaid_request_info['shuffled'] = True

    # Sample a large pool of chosen query qindexes
    if QINDEX is not None:
        # FIXME: should use a slice of the list or a sublist
        qindexes = ensure_flatlistlike(QINDEX)
        _test_qaids = [available_qaids[qx] for qx in qindexes if qx < len(available_qaids)]
        print('[get_test_qaids] Chose subset of size %d/%d' % (len(_test_qaids), len(available_qaids)))
        available_qaids = _test_qaids
        qaid_request_info['subset'] = qindexes

    if VERB_MAIN_HELPERS:
        print('[get_test_qaids] * len(available_qaids) = %r' % (len(available_qaids)))
        print('[get_test_qaids] L ___ GET_TEST_QAIDS ___')
    if return_annot_info:
        return available_qaids, qaid_request_info
    else:
        return available_qaids
Esempio n. 7
0
def ggr_random_name_splits():
    """
    CommandLine:
        python -m wbia.viz.viz_graph2 ggr_random_name_splits --show

    Ignore:
        sshfs -o idmap=user lev:/ ~/lev

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.viz.viz_graph2 import *  # NOQA
        >>> ggr_random_name_splits()
    """
    import wbia.guitool as gt

    gt.ensure_qtapp()
    # nid_list = ibs.get_valid_nids(filter_empty=True)
    import wbia

    dbdir = '/media/danger/GGR/GGR-IBEIS'
    dbdir = (dbdir if ut.checkpath(dbdir) else
             ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS'))
    ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False)

    import datetime

    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    orig_filter_kw = {
        'multiple': None,
        # 'view': ['right'],
        # 'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union(
        orig_filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    ))
    orig_all_annots = ibs.annots(orig_aids)
    orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group(
        orig_all_annots.nids)
    # Ensure we get everything
    orig_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids)
    ]

    # pip install quantumrandom
    if False:
        import quantumrandom

        data = quantumrandom.uint16()
        seed = data.sum()
        print('seed = %r' % (seed, ))
        # import Crypto.Random
        # from Crypto import Random
        # quantumrandom.get_data()
        # StrongRandom = Crypto.Random.random.StrongRandom
        # aes.reseed(3340258)
        # chars = [str(chr(x)) for x in data.view(np.uint8)]
        # aes_seed = str('').join(chars)
        # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator()
        # aes.reseed(aes_seed)
        # aes.pseudo_random_data(10)

    orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258)
    orig_sample_size = 75
    random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs)
    orig_annot_sample = random_annot_groups[:orig_sample_size]

    # OOOPS MADE ERROR REDO ----

    filter_kw = {
        'multiple': None,
        'view': ['right'],
        'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    filter_kw_ = ut.dict_union(
        filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    )
    refiltered_sample = [
        ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)
        for annot in orig_annot_sample
    ]
    is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2
    ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok)
    ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample]

    # Now compute real sample
    aids = ibs.filter_annots_general(filter_kw=filter_kw_)
    all_annots = ibs.annots(aids)
    unique_nids, grouped_annots_ = all_annots.group(all_annots.nids)
    grouped_annots = grouped_annots_
    # Ensure we get everything
    # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)]

    pop = len(grouped_annots)
    pername_list = ut.lmap(len, grouped_annots)
    groups = wbia.annots.AnnotGroups(grouped_annots, ibs)
    match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags]
    tag_case_hist = ut.dict_hist(ut.flatten(match_tags))
    print('name_pop = %r' % (pop, ))
    print('Annots per Multiton Name' +
          ut.repr3(ut.get_stats(pername_list, use_median=True)))
    print('Name Tag Hist ' + ut.repr3(tag_case_hist))
    print('Percent Photobomb: %.2f%%' %
          (tag_case_hist['photobomb'] / pop * 100))
    print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100))

    # Remove the ok part from this sample
    remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids)
    remain_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids)
    ]

    sample_size = 75
    import vtool as vt

    vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05)

    remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots),
                                         seed=3340258)
    remain_sample_size = sample_size - len(ok_part_orig_nids)
    remain_random_annot_groups = ut.take(remain_grouped_annots,
                                         remain_rand_idxs)
    remain_annot_sample = remain_random_annot_groups[:remain_sample_size]

    annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample
    # Filter out all bad parts
    annot_sample_filter = [
        ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_))
        for annot in annot_sample_nofilter
    ]
    annot_sample = annot_sample_filter

    win = None
    from wbia.viz import viz_graph2

    for annots in ut.InteractiveIter(annot_sample):
        if win is not None:
            win.close()
        win = viz_graph2.make_qt_graph_interface(ibs,
                                                 aids=annots.aids,
                                                 init_mode='rereview')
        print(win)

    sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs)

    flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags]

    print('Using Split and Photobomb')
    is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only Photobomb')
    is_positive = ['photobomb' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only SplitCase')
    is_positive = ['splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)
Esempio n. 8
0
def random_xy_sample(X, y, size_, data_per_label, seed=0):
    label_indicies = ut.random_indexes(len(y), seed=seed)[0:size_]
    data_indicies = expand_data_indicies(label_indicies, data_per_label)
    X_subset = X.take(data_indicies, axis=0)
    y_sbuset = y.take(label_indicies, axis=0)
    return X_subset, y_sbuset