Beispiel #1
0
def RandomLearner(X, y):
    """
	Create an active learner with random query strategy and run the active learner on the given data set. You should implement this also using modAL. Use SVM classifier with default parameter as the estimator.
	Input:
	The data set X and the corresponding labels
	Return:
	The accuracies evaluated on X, y whenever querying the true label of a data point from oracle as a one-demensional numpy array, the number of data points that are queried from oracle for the true label.
	"""

    random_learner = ActiveLearner(estimator=SVC(),
                                   query_strategy=RandomQuery,
                                   X_training=np.array([[0.5, 4.0], [2.0,
                                                                     1.0]]),
                                   y_training=np.array([[0], [1]]))

    ### TODO: Write the main loop for running the random active learner
    accuracies = []
    i = 0

    #S, SLabels = np.array([[0.5, 4.0], [2.0, 1.0]]), np.array([[0], [1]])
    U, ULabels = copy.deepcopy(X), copy.deepcopy(y)

    while (len(U) != 0):
        idx, instance = random_learner.query(U)
        i += 1
        random_learner._add_training_data(U[idx].reshape(1, 2),
                                          ULabels[idx].reshape(1, 1))
        random_learner._fit_to_known()
        U, ULabels = np.delete(U, idx, axis=0), np.delete(ULabels, idx, axis=0)
        acc = random_learner.score(X, y)
        accuracies.append(acc)

    return np.array(accuracies), i
Beispiel #2
0
def CALLearner(X, y):
    """
	Create an active learner with CAL query strategy and run the active learner on the given data set
	Input:
	The data set X and the corresponding labels
	Return:
	The accuracies evaluated on X, y using the fitted model with the labeled data so far whenever querying the true label of a data point from oracle as a one-demensional numpy array, the number of data points that are queried from oracle for the true label.
	"""

    # use SVM classifier with default parameters
    clf = SVC()
    # create an active learner with CAL as query strategy. The labeled pool of data is initially not empty, it contains two data points that belong to two classes.
    CAL_learner = ActiveLearner(estimator=clf,
                                query_strategy=CAL,
                                X_training=np.array([[0.5, 4.0], [2.0, 1.0]]),
                                y_training=np.array([[0], [1]]))
    # In worst case, we would need to query all data points in the unlabeled pool.
    n_queries = len(y)

    # use variable i to keep track of the number of data points that are queried from oracle
    i = 0
    # store the accuracies evaluated on X, y whenever querying the true label of a data point from oracle
    accuracies = []

    ### TODO: Write the main loop for running the CAL active learner, make sure you maintain the labeled pool and unlabeled pool properly, and calculate the accuracy of the estimater on all given data, i.e. X, y whenever you query a data point from the oracle for the true label.
    S, SLabels = np.array([[0.5, 4.0], [2.0, 1.0]]), np.array([[0], [1]])
    U, ULabels = copy.deepcopy(X), copy.deepcopy(y)

    while (i < n_queries and len(U) != 0):
        idx, y_idx, is_queried = CAL_learner.query(S, SLabels, U, ULabels)
        CAL_learner._add_training_data(U[idx].reshape(1, 2),
                                       y_idx.reshape(1, 1))
        CAL_learner._fit_to_known()
        S, SLabels = np.vstack((S, U[idx].reshape(1, 2))), np.vstack(
            (SLabels, y_idx.reshape(1, 1)))
        U, ULabels = np.delete(U, idx, axis=0), np.delete(ULabels, idx)
        acc = CAL_learner.score(X, y)
        if (is_queried):
            i += 1
            accuracies.append(acc)

    return np.array(accuracies), i
Beispiel #3
0
def RandomLearner(X, y):
    """
    Create an active learner with random query strategy and run the active learner on the given data set. You should implement this also using modAL. Use SVM classifier with default parameter as the estimator.
    Input:
    The data set X and the corresponding labels
    Return:
    The accuracies evaluated on X, y whenever querying the true label of a data point from oracle as a one-demensional numpy array, the number of data points that are queried from oracle for the true label.
    """
    random_learner = ActiveLearner(estimator=SVC(gamma='scale'),
                                   query_strategy=RandomQuery,
                                   X_training=np.array([[0.5, 4.0], [2.0,
                                                                     1.0]]),
                                   y_training=np.array([[0], [1]]))

    accuracies = []
    n_queries = len(y)
    i = 0
    while i < n_queries:
        if len(random_learner.y_training) == 2:
            U = X
            ULabels = y
        else:
            U = np.delete(U, query_idx, axis=0)
            ULabels = np.delete(ULabels, query_idx)
            if not len(U):
                break

        query_idx, query_instance = random_learner.query(U)

        # add to training data
        random_learner._add_training_data(U[query_idx, :].reshape(-1, 2),
                                          ULabels[query_idx].reshape(-1, 1))
        # fit on training data
        random_learner._fit_to_known()
        # calculate the accuracy of the learned estimator on the entire dataset
        accuracies.append(random_learner.score(X, y))
        i += 1

    return np.array(accuracies), i
Beispiel #4
0
def DHMLearner(X, y):
    """
    Create an active learner with DHM query strategy and run the active learner on the given data set
    Input:
    The data set X and the corresponding labels
    Return:
    The accuracies evaluated on X, y using the fitted model with the labeled data so far whenever querying the true label of a data point from oracle as a one-demensional numpy array, the number of data points that are queried from oracle for the true label.
    """

    # use SVM classifier with default parameters
    clf = SVC(gamma='scale')
    # create an active learner with DHM as query strategy. The S and SLabels pool are initially not empty, it contains two data points that belong to two classes.
    DHM_learner = ActiveLearner(estimator=clf,
                                query_strategy=DHM,
                                X_training=np.array([[0.5, 4.0], [2.0, 1.0]]),
                                y_training=np.array([[0], [1]]))
    # In worst case, we would need to query all data points in the unlabeled pool.
    n_queries = len(y)
    # use variable i to keep track of the number of data points that are queried from oracle
    i = 0
    # store the accuracies evaluated on X, y whenever querying the true label of a data point from oracle
    accuracies = []
    # we create another active learner, and simply use it to maintain the T and TLabels pool. Do not use this active learner to fit.
    DHM_queried = ActiveLearner(estimator=clf)

    # TODO: Write the main loop for running the DHM active learner, make sure you maintain the labeled pool, both S/SLabels and T/TLabels, and unlabeled pool properly, and calculate the accuracy of the estimater on all given data, i.e. X, y whenever you query a data point from the oracle for the true label.
    while i < n_queries:
        if len(DHM_learner.y_training) == 2:
            U = X
            ULabels = y
            S = np.array([[0.5, 4.0], [2.0, 1.0]])
            SLabels = np.array([[0], [1]])
            T = None
            TLabels = None
        else:
            if is_queried:
                if TLabels is None:
                    T = U[query_idx:query_idx + 1]
                    TLabels = ULabels[query_idx:query_idx + 1]
                else:
                    T = np.vstack((T, U[query_idx:query_idx + 1]))
                    TLabels = np.vstack(
                        (TLabels, ULabels[query_idx:query_idx + 1]))
            else:  # not queried but inferred
                S = np.vstack((S, U[query_idx:query_idx + 1]))
                SLabels = np.vstack(
                    (SLabels, ULabels[query_idx:query_idx + 1]))

            U = np.delete(U, query_idx, axis=0)
            ULabels = np.delete(ULabels, query_idx)
            if not len(U):
                break

        query_idx, query_label, is_queried = DHM_learner.query(
            S, SLabels, T, TLabels, U, ULabels)
        # add to training data
        DHM_learner._add_training_data(U[query_idx, :].reshape(-1, 2),
                                       query_label.reshape(-1, 1))
        # fit on training data
        DHM_learner._fit_to_known()
        # calculate the accuracy of the learned estimator on the entire dataset
        acc = DHM_learner.score(X, y)

        if is_queried:
            accuracies.append(acc)
            i += 1

    return np.array(accuracies), i