コード例 #1
0
def show_toy_distributions(toy_params):
    import vtool as vt
    import plottool as pt
    pt.ensure_pylab_qt4()
    xdata = np.linspace(0, 8, 1000)
    tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
    fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
    pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'],
                          prob_colors=[pt.TRUE_BLUE, pt.FALSE_RED],
                          xdata=xdata,
                          figtitle='Toy Distributions')
コード例 #2
0
ファイル: demobayes.py プロジェクト: heroinlin/ibeis
def show_toy_distributions(toy_params):
    import vtool as vt
    import plottool as pt
    pt.ensure_pylab_qt4()
    xdata = np.linspace(0, 8, 1000)
    tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
    fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
    pt.plot_probabilities(
        [tp_pdf, fp_pdf], ['TP', 'TF'],
        prob_colors=[pt.TRUE_BLUE, pt.FALSE_RED],
        xdata=xdata,
        figtitle='Toy Distributions')
コード例 #3
0
ファイル: _grave.py プロジェクト: Erotemic/vtool
    def visualize(encoder):
        import plottool as pt

        # is_timedata = False
        is_timedelta = True
        p_xdata = encoder.p_xdata
        xdata_domain = encoder.xdata_domain
        # if is_timedata:
        #    xdata_domain_ = [ut.unixtime_to_datetimeobj(unixtime) for unixtime in xdata_domain]
        if is_timedelta:
            # xdata_domain_ = [ut.unixtime_to_timedelta(unixtime) for unixtime in xdata_domain]
            pass
        else:
            pass
            # xdata_domain_ = xdata_domain
        pt.plot_probabilities([p_xdata], [""], xdata=xdata_domain)
        ax = pt.gca()

        # HISTOGRAM
        if False:
            X = encoder.support["X"]
            xdata = X[~np.isnan(X)]
            n, bins, patches = pt.plt.hist(xdata, 1000)

        ax.set_xlabel("xdata")
        if is_timedelta:
            ax.set_xlabel("Time Delta")
            ax.set_title("Timedelta distribution")

            def timeTicks(x, pos):
                import datetime

                d = datetime.timedelta(seconds=x)
                return str(d)

            import matplotlib as mpl

            formatter = mpl.ticker.FuncFormatter(timeTicks)
            ax.xaxis.set_major_formatter(formatter)
            pt.gcf().autofmt_xdate()
コード例 #4
0
ファイル: _grave.py プロジェクト: SU-ECE-18-7/vtool
    def visualize(encoder):
        import plottool as pt
        #is_timedata = False
        is_timedelta = True
        p_xdata = encoder.p_xdata
        xdata_domain = encoder.xdata_domain
        #if is_timedata:
        #    xdata_domain_ = [ut.unixtime_to_datetimeobj(unixtime) for unixtime in xdata_domain]
        if is_timedelta:
            #xdata_domain_ = [ut.unixtime_to_timedelta(unixtime) for unixtime in xdata_domain]
            pass
        else:
            pass
            #xdata_domain_ = xdata_domain
        pt.plot_probabilities([p_xdata], [''], xdata=xdata_domain)
        ax = pt.gca()

        # HISTOGRAM
        if False:
            X = encoder.support['X']
            xdata = X[~np.isnan(X)]
            n, bins, patches = pt.plt.hist(xdata, 1000)

        ax.set_xlabel('xdata')
        if is_timedelta:
            ax.set_xlabel('Time Delta')
            ax.set_title('Timedelta distribution')

            def timeTicks(x, pos):
                import datetime
                d = datetime.timedelta(seconds=x)
                return str(d)

            import matplotlib as mpl
            formatter = mpl.ticker.FuncFormatter(timeTicks)
            ax.xaxis.set_major_formatter(formatter)
            pt.gcf().autofmt_xdate()
コード例 #5
0
ファイル: _grave_bayes.py プロジェクト: heroinlin/ibeis
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool as vt
    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {'mu': 1.0, 'sigma': 2.2},
        False: {'mu': 7.0, 'sigma': .9}
    }

    if True:
        import vtool as vt
        import plottool as pt
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        print(ut.repr2(pairwise_scores_mat, precision=1))

    #aids = list(range(num_annots))
    #g = netx.DiGraph()
    #g.add_nodes_from(aids)
    #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    #netx.draw_graphviz(g)
    #pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn
    bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, ('[enc] bandwidth is 0. Cannot cluster')
    # bandwidth is with respect to the RBF used in clustering
    #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    #)

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))
    #X_data, seconds_thresh, criterion='distance')

    #help(hdbscan.HDBSCAN)

    import hdbscan
    alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))

    #import ddbscan
    #help(ddbscan.DDBSCAN)
    #alg = ddbscan.DDBSCAN(2, 2)

    #D = np.zeros((len(aids), len(aids) + 1))
    #D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    #hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow
        #from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])
コード例 #6
0
ファイル: crf.py プロジェクト: whaozl/ibeis
def crftest():
    """
    pip install pyqpbo
    pip install pystruct

    http://taku910.github.io/crfpp/#install

    cd ~/tmp
    #wget https://drive.google.com/folderview?id=0B4y35FiV1wh7fngteFhHQUN2Y1B5eUJBNHZUemJYQV9VWlBUb3JlX0xBdWVZTWtSbVBneU0&usp=drive_web#list
    7z x CRF++-0.58.tar.gz
    7z x CRF++-0.58.tar
    cd CRF++-0.58
    chmod +x configure
    ./configure
    make

    """
    import pystruct
    import pystruct.models
    inference_method_options = ['lp', 'max-product']
    inference_method = inference_method_options[1]

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method=inference_method,
    #    class_weight=None,
    #    directed=False,
    #)

    num_annots = 5
    num_names = num_annots

    aids = np.arange(5)
    rng = np.random.RandomState(0)
    hidden_nids = rng.randint(0, num_names, num_annots)
    unique_nids, groupxs = ut.group_indices(hidden_nids)

    # Indicator vector indicating the name
    node_features = np.zeros((num_annots, num_names))
    node_features[(aids, hidden_nids)] = 1

    toy_params = {
        True: {
            'mu': 1.0,
            'sigma': 2.2
        },
        False: {
            'mu': 7.0,
            'sigma': .9
        }
    }
    if False:
        import vtool as vt
        import plottool as pt
        pt.ensure_pylab_qt4()
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array(
        [hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)

    graph = pystruct.models.EdgeFeatureGraphCRF(
        n_states=num_annots,
        n_features=num_names,
        n_edge_features=1,
        inference_method=inference_method,
    )

    import opengm

    numVar = 10
    unaries = np.ones([numVar, 3], dtype=opengm.value_type)
    gm = opengm.gm(np.ones(numVar, dtype=opengm.label_type) * 3)
    unary_fids = gm.addFunctions(unaries)
    gm.addFactors(unary_fids, np.arange(numVar))
    infParam = opengm.InfParam(workflow=ut.ensure_ascii('(IC)(TTC-I,CC-I)'), )
    inf = opengm.inference.Multicut(gm, parameter=infParam)
    visitor = inf.verboseVisitor(printNth=1, multiline=False)
    inf.infer(visitor)
    arg = inf.arg()

    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)
    # regularizer = opengm.pottsFunction([3, 3], 0.0, beta)
    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)

    unaries = np.random.rand(10, 10, 2)
    potts = opengm.PottsFunction([2, 2], 0.0, 0.4)
    gm = opengm.grid2d2Order(unaries=unaries, regularizer=potts)

    inf = opengm.inference.GraphCut(gm)
    inf.infer()
    arg = inf.arg()
コード例 #7
0
ファイル: crf.py プロジェクト: Erotemic/ibeis
def crftest():
    """
    pip install pyqpbo
    pip install pystruct

    http://taku910.github.io/crfpp/#install

    cd ~/tmp
    #wget https://drive.google.com/folderview?id=0B4y35FiV1wh7fngteFhHQUN2Y1B5eUJBNHZUemJYQV9VWlBUb3JlX0xBdWVZTWtSbVBneU0&usp=drive_web#list
    7z x CRF++-0.58.tar.gz
    7z x CRF++-0.58.tar
    cd CRF++-0.58
    chmod +x configure
    ./configure
    make

    """
    import pystruct
    import pystruct.models
    inference_method_options = ['lp', 'max-product']
    inference_method = inference_method_options[1]

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method=inference_method,
    #    class_weight=None,
    #    directed=False,
    #)

    num_annots = 5
    num_names = num_annots

    aids = np.arange(5)
    rng = np.random.RandomState(0)
    hidden_nids = rng.randint(0, num_names, num_annots)
    unique_nids, groupxs = ut.group_indices(hidden_nids)

    # Indicator vector indicating the name
    node_features = np.zeros((num_annots, num_names))
    node_features[(aids, hidden_nids)] = 1

    toy_params = {
        True: {'mu': 1.0, 'sigma': 2.2},
        False: {'mu': 7.0, 'sigma': .9}
    }
    if False:
        import vtool as vt
        import plottool as pt
        pt.ensure_pylab_qt4()
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)


    graph = pystruct.models.EdgeFeatureGraphCRF(
        n_states=num_annots,
        n_features=num_names,
        n_edge_features=1,
        inference_method=inference_method,
    )

    import opengm

    numVar = 10
    unaries = np.ones([numVar, 3], dtype=opengm.value_type)
    gm = opengm.gm(np.ones(numVar, dtype=opengm.label_type) * 3)
    unary_fids = gm.addFunctions(unaries)
    gm.addFactors(unary_fids, np.arange(numVar))
    infParam = opengm.InfParam(
        workflow=ut.ensure_ascii('(IC)(TTC-I,CC-I)'),
    )
    inf = opengm.inference.Multicut(gm, parameter=infParam)
    visitor = inf.verboseVisitor(printNth=1, multiline=False)
    inf.infer(visitor)
    arg = inf.arg()

    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)
    # regularizer = opengm.pottsFunction([3, 3], 0.0, beta)
    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)

    unaries = np.random.rand(10, 10, 2)
    potts = opengm.PottsFunction([2, 2], 0.0, 0.4)
    gm = opengm.grid2d2Order(unaries=unaries, regularizer=potts)

    inf = opengm.inference.GraphCut(gm)
    inf.infer()
    arg = inf.arg()
コード例 #8
0
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool as vt
    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {
            'mu': 1.0,
            'sigma': 2.2
        },
        False: {
            'mu': 7.0,
            'sigma': .9
        }
    }

    if True:
        import vtool as vt
        import plottool as pt
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array(
        [hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        print(ut.repr2(pairwise_scores_mat, precision=1))

    #aids = list(range(num_annots))
    #g = netx.DiGraph()
    #g.add_nodes_from(aids)
    #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    #netx.draw_graphviz(g)
    #pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn
    bandwidth = sklearn.cluster.estimate_bandwidth(
        X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, ('[] bandwidth is 0. Cannot cluster')
    # bandwidth is with respect to the RBF used in clustering
    #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth,
                                   bin_seeding=True,
                                   cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1,
                                           max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    #)

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data,
                                                  thresh,
                                                  metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs), ))
    print('common groups = %r' %
          (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))
    #X_data, seconds_thresh, criterion='distance')

    #help(hdbscan.HDBSCAN)

    import hdbscan
    alg = hdbscan.HDBSCAN(metric=metric,
                          min_cluster_size=1,
                          p=1,
                          gen_min_span_tree=1,
                          min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs), ))
    print('common groups = %r' %
          (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))

    #import ddbscan
    #help(ddbscan.DDBSCAN)
    #alg = ddbscan.DDBSCAN(2, 2)

    #D = np.zeros((len(aids), len(aids) + 1))
    #D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    #hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow
        #from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])