Example #1
0
def test_alpha_deprecation():
    X, y = make_classification(n_samples=100)
    y[::3] = -1

    lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1)
    lp_default_y = lp_default.fit(X, y).transduction_

    lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1)
    lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_

    assert_array_equal(lp_default_y, lp_0_y)
def test_label_propagation_closed_form():
    n_classes = 2
    X, y = make_classification(n_classes=n_classes,
                               n_samples=200,
                               random_state=0)
    y[::3] = -1
    Y = np.zeros((len(y), n_classes + 1))
    Y[np.arange(len(y)), y] = 1
    unlabelled_idx = Y[:, (-1, )].nonzero()[0]
    labelled_idx = (Y[:, (-1, )] == 0).nonzero()[0]

    clf = label_propagation.LabelPropagation(max_iter=10000, gamma=0.1)
    clf.fit(X, y)
    # adopting notation from Zhu et al 2002
    T_bar = clf._build_graph()
    Tuu = T_bar[tuple(
        np.meshgrid(unlabelled_idx, unlabelled_idx, indexing='ij'))]
    Tul = T_bar[tuple(np.meshgrid(unlabelled_idx, labelled_idx,
                                  indexing='ij'))]
    Y = Y[:, :-1]
    Y_l = Y[labelled_idx, :]
    Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l)

    expected = Y.copy()
    expected[unlabelled_idx, :] = Y_u
    expected /= expected.sum(axis=1)[:, np.newaxis]

    assert_array_almost_equal(expected, clf.label_distributions_, 4)
def test_convergence_warning():
    # This is a non-regression test for #5774
    X = np.array([[1., 0.], [0., 1.], [1., 2.5]])
    y = np.array([0, 1, -1])
    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1)
    assert_warns(ConvergenceWarning, mdl.fit, X, y)
    assert mdl.n_iter_ == mdl.max_iter

    mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)

    mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=500)
    assert_no_warnings(mdl.fit, X, y)
Example #4
0
def train(X, y, y_train, labels, graph, output_file, top_n=100):
    indices = np.arange(len(y))
    total_promoted, epoch = 0, 1
    outputs = [['Epoch 0']]

    for _id, label in enumerate(labels):
        chunks = [label]
        current_output = []
        for i in indices[y_train == _id]:
            node = graph.entities.get_node(i)
            chunks.append(node)
            current_output.append(node)
            total_promoted += 1
        outputs.append([label] + current_output)
        print('\t'.join(chunks))

    print_state(epoch, labels, total_promoted, y_train)
    while total_promoted < 5522:  # 19875:  # 5522:
        outputs.append(['Epoch %d' % epoch])
        model = label_propagation.LabelPropagation(kernel='knn',
                                                   tol=0.01,
                                                   max_iter=2000,
                                                   n_jobs=16)
        model.fit(X, y_train)

        predictions = model.transduction_
        confidences = entropy(model.label_distributions_.T)

        for _id, label in enumerate(labels):
            mask = np.logical_and(predictions == _id, y_train == -1)
            ii = indices[mask]
            cc = confidences[mask]
            promoted = ii[np.argsort(cc)][:top_n]
            y_train[promoted] = _id
            chunks = [label]
            current_output = []
            for i in promoted:
                node = graph.entities.get_node(i)
                chunks.append(node)
                current_output.append(node)
            print('\t'.join(chunks))
            total_promoted += len(promoted)
            outputs.append([label] + current_output)
        print_state(epoch, labels, total_promoted, predictions)
        epoch += 1
    with open(output_file, 'w') as f:
        for line in pre_func(outputs):
            f.write(line + '\n')
Example #5
0
def run_US(X):

    ss = StandardScaler()
    ss.fit(X)
    std_X = ss.transform(X)
    shape = np.array(para_shape)
    shape_ = shape / float(np.max(shape))
    std_X = std_X * shape_

    rndm_para_index_list = list(range(num_param))
    random.shuffle(rndm_para_index_list)

    detected_SP = []
    detected_rate_list = []
    labeled_index_list = []
    unlabeled_index_list = list(range(num_param))
    phase_list = [-1 for i in range(num_param)]
    for it in range(init_rndm):
        next_param = param_data[rndm_para_index_list[it]]
        #Call simulator
        success_rate = simulator.simulation(next_param, test=isTest)
        if success_rate >= success_threshold:
            detected_SP.append(next_param)

        labeled_index_list.append(rndm_para_index_list[it])
        unlabeled_index_list = [
            x for x in range(num_param) if x not in labeled_index_list
        ]
        phase_list[rndm_para_index_list[
            it]] = 1 if success_rate >= success_threshold else 0
        detected_rate_list.append(success_rate)
        print('iteration:', it + 1, 'checked param:', next_param,
              'Num. of SPs', len(detected_SP))

    for it in range(init_rndm, iteration):
        if len(detected_SP) == 0:

            next_param = param_data[rndm_para_index_list[it]]
            #Call simulator
            success_rate = simulator.simulation(next_param, test=isTest)
            if success_rate >= success_threshold:
                detected_SP.append(next_param)
            labeled_index_list.append(rndm_para_index_list[it])
            unlabeled_index_list = [
                x for x in range(num_param) if x not in labeled_index_list
            ]
            phase_list[rndm_para_index_list[
                it]] = 1 if success_rate >= success_threshold else 0
            print('iteration:', it + 1, 'checked param:', next_param,
                  'Num. of SPs', len(detected_SP))
        else:
            grid_list = np.array(
                [list(std_X[i]) + [phase_list[i]] for i in range(num_param)])

            label_train = grid_list[:, -1]
            lp_model = label_propagation.LabelPropagation()
            lp_model.fit(grid_list[:, :-1], label_train)
            predicted_labels = lp_model.transduction_[unlabeled_index_list]
            predicted_all_labels = lp_model.transduction_
            label_distributions = lp_model.label_distributions_[
                unlabeled_index_list]
            label_distributions_all = lp_model.label_distributions_
            classes = lp_model.classes_

            u_score_list = 1 - np.max(label_distributions, axis=1)
            uncertainty_index = [
                unlabeled_index_list[np.argmax(
                    1 - np.max(label_distributions, axis=1))]
            ]

            next_param = param_data[uncertainty_index[0]]
            #Call simulator
            success_rate = simulator.simulation(next_param, test=isTest)
            if success_rate >= success_threshold:
                detected_SP.append(next_param)

            labeled_index_list.append(uncertainty_index[0])
            unlabeled_index_list = [
                x for x in range(num_param) if x not in labeled_index_list
            ]
            phase_list[uncertainty_index[
                0]] = 1 if success_rate >= success_threshold else 0
            print('iteration:', it + 1, 'checked param:', next_param,
                  'success rate:', success_rate, 'Num. of SPs:',
                  len(detected_SP))
Example #6
0
def query_next_data_points(X: np.array,
                           Y: np.array,
                           label_presence=None,
                           algorithm='LP',
                           kernel='rbf',
                           gamma=20,
                           n_neighbors=7,
                           max_iter=1000,
                           tol=0.001,
                           n_jobs=None,
                           alpha=0.2,
                           US_strategy='E'):
    """

    Suggests the next data points for sampling.
    Args:
        X: n by d numpy array where n is the number of data points and d is
        the dimension of each data point.
        Y: the labels for X. 1d array or n by t numpy array where t is the number of tasks
        for multitasking. -1 means missing data points.
        label_presence: a list-like object of boolean that tells which data points luck labels.
        If None, unlabeled data points will be inferred from Y.
        algorithm: classifier used for label propagation. One of ['LP',
        'LS', 'SVM'] for sklearn.semi_supervised.LabelPropagation,
        sklearn.semi_supervised.LabelSpreading, or sklearn.svm.SVC, respectively.
        kernel: the kernel used in algorithm. See options in sklearn doc.
        gamma: Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
        n_neighbors: Parameter for knn kernel.
        max_iter: maximum number of iterations allowed.
        tol: convergence tolerance.
        n_jobs: the number of parallel jobs to run.
        US_strategy: uncertainty sampling strategy. One of ['MS', 'LC', 'E', 'RS'].
    Returns:
        The index of next data points in X and Y suggested by the algorithm.

    """
    _Y = Y.copy()
    if len(_Y.shape) == 1:
        _Y = np.expand_dims(_Y, 1)
    task_num = _Y.shape[1]
    if label_presence is None:
        unlabeled_indices = (np.ones(task_num, dtype=int) * -1 == _Y).all(1)
        unlabeled_indices = np.nonzero(unlabeled_indices)[0]
    else:
        unlabeled_indices = np.nonzero(np.logical_not(label_presence))[0]
        _Y[unlabeled_indices] = np.ones([len(unlabeled_indices), task_num
                                         ]) * -1

    labeled_indices = set(range(_Y.shape[0])) - set(unlabeled_indices)
    labeled_indices = sorted(labeled_indices)

    predicted_labels = []
    predicted_all_labels = []
    label_distributions = []
    label_distributions_all = []
    classes = []

    if US_strategy == 'RS':
        index_most_uncertain = np.random.permutation(unlabeled_indices)[:5]
        u_score_list = [0.5 for i in range(len(label_distributions))]
        return index_most_uncertain

    if algorithm in ['LS', 'LP']:
        if algorithm == 'LS':
            lp_models = [
                label_propagation.LabelSpreading(kernel, gamma, n_neighbors,
                                                 alpha, max_iter, tol, n_jobs)
                for _ in range(task_num)
            ]
        else:
            if n_jobs is None:
                nj = 1
            else:
                nj = n_jobs
            lp_models = [
                label_propagation.LabelPropagation(
                    kernel=kernel,
                    gamma=gamma,
                    n_neighbors=n_neighbors,
                    max_iter=max_iter,
                    tol=tol,
                    n_jobs=nj) for _ in range(task_num)
            ]
        for task_index, lp_model in enumerate(lp_models):
            lp_model.fit(X, _Y[:, task_index])
            predicted_labels.append(lp_model.transduction_[unlabeled_indices])
            predicted_all_labels.append(lp_model.transduction_)
            label_distributions.append(
                lp_model.label_distributions_[unlabeled_indices])
            label_distributions_all.append(lp_model.label_distributions_)
            classes.append(lp_model.classes_)

    elif algorithm == 'SVM':
        lp_models = [
            svm.SVC(probability=True, C=10, gamma=gamma)
            for _ in range(task_num)
        ]
        # train SVM
        for task_index, lp_model in enumerate(lp_models):
            y = _Y[:, task_index]
            y_labeled = y[y != -1]
            x = X[y != -1]
            lp_model = lp_models[task_index]
            lp_model.fit(x, y_labeled)
            predicted_labels.append(lp_model.predict(X[unlabeled_indices]))
            predicted_all_labels.append(lp_model.predict(X))
            label_distributions.append(
                lp_model.predict_proba(X[unlabeled_indices]))
            label_distributions_all.append(lp_model.predict_proba(X))
            classes.append(lp_model.classes_)

    # select up to 5 examples that the classifier is most uncertain about
    if US_strategy == 'E':
        pred_entropies_list = []
        for task_index in range(_Y.shape[1]):
            entropies = stats.distributions.entropy(
                label_distributions[task_index].T)
            pred_entropies_list.append(entropies)
        pred_entropies = np.vstack(pred_entropies_list, )
        pred_entropies = np.sum(pred_entropies, axis=0)
        uncertainty_score_list = pred_entropies / np.max(pred_entropies)
        sorted_entropies = np.argsort(pred_entropies)
        index_most_uncertain = unlabeled_indices[sorted_entropies[-5:]]

    elif US_strategy == 'LC':
        label_distributions = np.stack(label_distributions).mean(axis=0)
        u_score_list = 1 - np.max(
            label_distributions, axis=1)  # could use just np.min, but..
        #for i in range(len(label_distributions_all)):
        #    print(i, label_distributions_all[i], grid_list_std[i])
        #print('label_distributions_all', label_distributions_all)
        #print('u_score_list', u_score_list)
        sorted_score = np.argsort(u_score_list)
        index_most_uncertain = unlabeled_indices[sorted_score[-5:]]

    elif US_strategy == 'MS':
        label_distributions = np.stack(label_distributions).mean(axis=0)
        u_score_list = []
        for pro_dist in label_distributions:
            pro_ordered = np.sort(pro_dist)[::-1]
            margin = pro_ordered[0] - pro_ordered[1]
            u_score_list.append(margin)

        sorted_score = np.argsort(u_score_list)
        index_most_uncertain = unlabeled_indices[sorted_score[:5]]
        u_score_list = 1 - np.array(u_score_list)

    return index_most_uncertain
def plot(X, y, lstStrCategories):

    pca = decomposition.PCA(n_components=2)
    pca.fit(X)
    X = pca.transform(X)

    rng = np.random.RandomState(0)

    y_30 = np.copy(y)
    y_30[rng.rand(len(y)) < 0.3] = -1
    y_50 = np.copy(y)
    y_50[rng.rand(len(y)) < 0.5] = -1
    y_75 = np.copy(y)
    y_75[rng.rand(len(y)) < 0.8] = -1

    ls50 = (label_propagation.LabelSpreading().fit(X, y_50), y_50)
    ls75 = (label_propagation.LabelSpreading().fit(X, y_75), y_75)
    ls100 = (label_propagation.LabelSpreading().fit(X, y), y)
    lp100 = (label_propagation.LabelPropagation().fit(X, y), y)

    clfLabelSpread = label_propagation.LabelSpreading()
    clfLabelSpread.fit(X, y_30)

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
                         np.arange(y_min, y_max, .02))

    titles = [
        'Label Spreading 50%', 'Label Spreading 75%', 'Label Spreading 100%',
        'Label Propagation 100%'
    ]

    color_map = {
        -1: (1, 1, 1),
        0: colorConverter.to_rgb(CategoryStr.lstStrColors[0]),
        1: colorConverter.to_rgb(CategoryStr.lstStrColors[1]),
        2: colorConverter.to_rgb(CategoryStr.lstStrColors[2]),
        3: colorConverter.to_rgb(CategoryStr.lstStrColors[3]),
        4: colorConverter.to_rgb(CategoryStr.lstStrColors[4]),
        5: colorConverter.to_rgb(CategoryStr.lstStrColors[5]),
        6: colorConverter.to_rgb(CategoryStr.lstStrColors[6])
    }

    cs = None

    for i, (clf, y_train) in enumerate((ls50, ls75, ls100, lp100)):
        plt.subplot(2, 2, i + 1)
        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

        colors = [color_map[y] for y in y_train]

        Z = Z.reshape(xx.shape)
        cs = plt.contourf(xx,
                          yy,
                          Z,
                          c=CategoryStr.lstStrColors,
                          cmap=plt.cm.Paired)
        #plt.axis('off')
        plt.ylim(-1, 1)
        plt.xlim(-1, 1)

        plt.scatter(X[:, 0], X[:, 1], c=colors, cmap=plt.cm.Paired, s=80)

        plt.title(titles[i])

    proxy = [
        plt.Rectangle((0, 0), 1, 1, fc=pc.get_facecolor()[0])
        for pc in cs.collections
    ]

    matPredictRes = clfLabelSpread.predict(X)
    print('matPredictRes: ', matPredictRes)

    plt.legend(proxy, lstStrCategories)
    plt.show()
Example #8
0
        cm.rainbow(float(i) / (max_label)) for i in range(max_label + 1)
    ]

    ss = StandardScaler()
    ss.fit(data_list)
    data_list_std = ss.transform(data_list)

    #----SAMPLING
    label_train = np.copy(label_list)
    #label_train[unlabeled_index_list] = -1

    #estimate phase of each point
    if LP_algorithm == 'LS':
        lp_model = label_propagation.LabelSpreading()
    elif LP_algorithm == 'LP':
        lp_model = label_propagation.LabelPropagation()

    lp_model.fit(data_list_std, label_train)
    predicted_labels = lp_model.transduction_[unlabeled_index_list]
    predicted_all_labels = lp_model.transduction_
    label_distributions = lp_model.label_distributions_[unlabeled_index_list]
    label_distributions_all = lp_model.label_distributions_
    classes = lp_model.classes_

    #print(label_train, classes,  predicted_labels, predicted_all_labels, label_distributions)

    #calculate Uncertainly Score
    if US_strategy == 'E':
        pred_entropies = stats.distributions.entropy(label_distributions.T)
        u_score_list = pred_entropies / np.max(pred_entropies)
        if parameter_constraint:
Example #9
0
]

color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}

for i, (clf, y_train) in enumerate((ls30, ls50, ls100, rbf_svc)):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    plt.subplot(2, 2, i + 1)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    plt.axis('off')

    # Plot also the training points
    colors = [color_map[y] for y in y_train]
    plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')

    plt.title(titles[i])

plt.suptitle("Unlabeled points are colored white", y=0.1)
plt.show()

print(X)
print(y_30)
y_pred = label_propagation.LabelSpreading().fit(X, y_30).predict(X)
print(y_pred)
y_pred = label_propagation.LabelPropagation().fit(X, y_30).predict(X)
print(y_pred)