def test_check_zero(self):
     tem = check_zero(0)
     self.assertEqual(tem != 0., True)
     tem = check_zero(0.0)
     self.assertEqual(tem != 0., True)
     tem = check_zero(1e-23)
     self.assertEqual(tem != 0., True)
Beispiel #2
0
def KappaMulti(ha, hb, y):
    vY = np.unique(np.concatenate([y, ha, hb]))
    dY = len(vY)  # number of labels / classes
    ha = np.array(ha)
    hb = np.array(hb)
    #
    # construct a contingency table
    Cij = np.zeros(shape=(dY, dY))
    for i in range(dY):
        for j in range(dY):
            Cij[i, j] = np.sum((ha == vY[i]) & (hb == vY[j]))
    m = len(y)  # number of instances / samples
    #
    c_diagonal = [Cij[i][i] for i in range(dY)]  # Cij[i, i]
    theta1 = np.sum(c_diagonal) / float(m)
    c_row_sum = [
        np.prod([(Cij[i, i] + Cij[i, j]) for j in range(dY) if (j != i)])
        for i in range(dY)
    ]
    c_col_sum = [
        np.prod([(Cij[i, j] + Cij[j, j]) for i in range(dY) if (i != j)])
        for j in range(dY)
    ]
    theta2 = np.sum(np.multiply(c_row_sum, c_col_sum)) / (float(m)**2)
    #
    ans = (theta1 - theta2) / check_zero(1. - theta2)
    del dY, ha, hb, Cij, m
    del c_row_sum, c_col_sum, c_diagonal
    gc.collect()
    return ans, theta1, theta2
def KLD(p, q):
    p = np.array(p, dtype=DTY_FLT)
    q = np.array(q, dtype=DTY_FLT)
    if np.sum(p) != 1.0:
        tem = np.sum(p)
        p /= check_zero(tem)
    if np.sum(q) != 1.0:
        tem = np.sum(q)
        q /= check_zero(tem)
    ans = 0.
    n = len(p)
    for i in range(n):
        tem = p[i] / check_zero(q[i])
        tem = p[i] * np.log(check_zero(tem))
        ans += tem
    return ans
Beispiel #4
0
def AdaBoostSelectTraining(X_trn, y_trn, weight):
    X_trn = np.array(X_trn, dtype=DTY_FLT)
    y_trn = np.array(y_trn, dtype=DTY_INT)
    weight = np.array(weight, dtype=DTY_FLT)
    vY = np.unique(y_trn)
    dY = len(vY)
    stack_X = []
    stack_y = []  # init

    for k in range(dY):
        idx = (y_trn == vY[k])
        tem_X = X_trn[idx].tolist()
        tem_y = y_trn[idx].tolist()
        tem_w = weight[idx]
        tem_w /= check_zero(np.sum(tem_w))
        tem_w = tem_w.tolist()
        wX, wy = resample(tem_X, tem_y, tem_w)
        stack_X.append(deepcopy(wX))
        stack_y.append(deepcopy(wy))
        del idx, tem_X, tem_y, tem_w, wX, wy
    del X_trn, y_trn, weight, vY, dY

    tem_X = np.concatenate(stack_X, axis=0)
    tem_y = np.concatenate(stack_y, axis=0)

    randseed = int(time.time() * GAP_MID % GAP_INF)
    prng = np.random.RandomState(randseed)
    idx = list(range(len(tem_y)))
    prng.shuffle(idx)
    wX = tem_X[idx].tolist()
    wy = tem_y[idx].tolist()
    del stack_X, stack_y, tem_X, tem_y, idx, randseed, prng
    gc.collect()
    return deepcopy(wX), deepcopy(wy)  # list
Beispiel #5
0
def AdaBoostEnsembleAlgorithm(X_trn, y_trn, name_cls, nb_cls):
    # Y\in {0,1}  # translate: y_trn = [ i*2-1  for i in y_trn]

    # Notice alpha here is relevant to this algorithm named AdaBoost.
    clfs = []
    nb_trn = len(y_trn)
    # initial
    weight = np.zeros((nb_cls, nb_trn), dtype=DTY_FLT)
    em = [0.0] * nb_cls
    alpha = [0.0] * nb_cls

    weight[0] = np.ones(nb_trn, dtype=DTY_FLT) / nb_trn
    for k in range(nb_cls):
        nb_count = 20
        while nb_count >= 0:
            # resample data: route wheel bat
            wX, wy = AdaBoostSelectTraining(X_trn, y_trn, weight[k].tolist())
            # train a base classifier and run it on ORIGINAL training
            clf = individual(name_cls, wX, wy)
            inspect = clf.predict(X_trn)
            # calculate the error rate
            i_tr = (inspect != np.array(y_trn))
            em[k] = np.sum(weight[k] * i_tr)
            if em[k] >= 0. and em[k] < 0.5:
                break
            nb_count -= 1
            del wX, wy
        del nb_count

        clfs.append(deepcopy(clf))
        # calculate alpha
        alpha[k] = 0.5 * np.log2(check_zero((1. - em[k]) / check_zero(em[k])))
        # update weights.  Notice that: y \in {-1,+1} here, transform from {0,1}
        i_tr = (np.array(y_trn) * 2 - 1) * (inspect * 2 - 1)
        if k + 1 < nb_cls:
            weight[k + 1] = weight[k] * np.exp(-1. * alpha[k] * i_tr)
            zm = np.sum(weight[k + 1])
            weight[k + 1] /= check_zero(zm)

    # regularization: alpha, sigma(coef)=1.
    am = np.sum(alpha)
    alpha = [i / am for i in alpha]

    del weight, em, clf, i_tr, zm, am
    gc.collect()
    return deepcopy(alpha), deepcopy(clfs)
Beispiel #6
0
def Interrater_agreement_multiclass(yt, y, m, nb_cls):
    y = np.array(y, dtype=DTY_INT)
    yt = np.array(yt, dtype=DTY_INT)
    p_bar = np.sum(np.sum(yt == y, axis=1)) / (float(m) * nb_cls)
    rho_x = np.sum(yt == y, axis=0)
    numerator = np.sum(rho_x * (nb_cls - rho_x)) / float(nb_cls)
    denominator = m * (nb_cls - 1.) * p_bar * (1. - p_bar)
    return 1. - numerator / check_zero(denominator)
def angle(a, b):
    a = np.array(a);    b = np.array(b)
    # dot product, scalar product
    prod = np.sum(a * b)    # $a \cdot b$  # or: prod = np.dot(a, b)
    # norm / module
    len1 = np.sqrt(np.sum(a * a))  # $|a|, |b|$
    len2 = np.sqrt(np.sum(b * b))
    # $\cos(\theta)$
    cos_theta = prod / check_zero(len1 * len2)
    theta = np.arccos(cos_theta)
    del a,b, prod,len1,len2, cos_theta
    gc.collect()
    return theta
Beispiel #8
0
 def test_Kappa_Statistic(self):
     m = 100
     y1, yt1, y2, yt2 = negative_generate_simulate(m, 2)
     ha1, hb1 = yt1
     ha2, hb2 = yt2
     d1 = Kappa_Statistic_binary(ha1, hb1, m)
     d2 = Kappa_Statistic_binary(ha2, hb2, m)
     self.assertEqual(d1, d2)
     d3 = Kappa_Statistic_multiclass(ha1, hb1, y1, m)
     d4 = Kappa_Statistic_multiclass(ha2, hb2, y2, m)
     self.assertEqual(all(np.array(d3) == np.array(d4)), True)
     self.assertEqual(d1, d3[0])
     self.assertEqual(d2, d4[0])
     y3, yt3 = generate_simulated_data(m, 7, 2)
     d3, t1, t2 = Kappa_Statistic_multiclass(yt3[0], yt3[1], y3, m)
     self.assertEqual((t1 - t2) / check_zero(1. - t2), d3)
Beispiel #9
0
def Coincident_Failure_multiclass(yt, y, m, nb_cls):
    y = np.array(y, dtype=DTY_INT)
    yt = np.array(yt, dtype=DTY_INT)
    failing = np.sum(yt != y, axis=0)
    pi = []
    for i in range(nb_cls + 1):
        tem = np.sum(failing == i) / float(m)
        pi.append(tem)
    #   #
    if pi[0] == 1.:
        return 0.
    if pi[0] < 1.:
        ans = 0.
        for i in range(1, nb_cls + 1):
            ans += pi[i] * (nb_cls - i) / (nb_cls - 1.)
        #   #
        return ans / check_zero(1. - pi[0])
    return
Beispiel #10
0
def Generalized_Diversity_multiclass(yt, y, m, nb_cls):
    y = np.array(y, dtype=DTY_INT)
    yt = np.array(yt, dtype=DTY_INT)
    failing = np.sum(yt != y, axis=0)
    # failing = np.sum(yt != y, axis=0) / nb_cls * nb_cls
    #
    pi = [-1.]
    for i in range(1, nb_cls + 1):
        tem = np.sum(failing == i) / float(m)
        pi.append(tem)
    #   #
    p_1 = 0.
    for i in range(1, nb_cls + 1):
        p_1 += pi[i] * i / nb_cls
    p_2 = 0.
    for i in range(1, nb_cls + 1):
        p_2 += pi[i] * (i * (i - 1.) / nb_cls / (nb_cls - 1.))
    #   #
    return 1. - p_2 / check_zero(p_1)
Beispiel #11
0
def Kappa_Statistic_multiclass(hi, hj, y, m):
    # m = len(y)  # number of instances / samples
    vY = np.unique(np.concatenate([y, hi, hj]))  # L
    dY = len(vY)
    Cij = multiclass_contingency_table(hi, hj, y)
    Cij = np.array(Cij, dtype=DTY_FLT)
    #
    c_diagonal = [Cij[i, i] for i in range(dY)]
    theta1 = np.sum(c_diagonal) / float(m)
    #
    c_row_sum = [
        np.prod([Cij[i, i] + Cij[i, j] for j in range(dY) if j != i])
        for i in range(dY)
    ]
    c_col_sum = [
        np.prod([Cij[i, j] + Cij[j, j] for i in range(dY) if i != j])
        for j in range(dY)
    ]
    theta2 = np.sum(np.multiply(c_row_sum, c_col_sum)) / (float(m)**2)
    #
    ans = (theta1 - theta2) / check_zero(1. - theta2)
    return ans, theta1, theta2
Beispiel #12
0
def Kappa_Statistic_binary(hi, hj, m):
    a, b, c, d = contingency_table(hi, hj)
    Theta_1 = (a + d) / float(m)
    Theta_2 = ((a + b) * (a + c) + (c + d) * (b + d)) / (float(m)**2)
    return (Theta_1 - Theta_2) / check_zero(1. - Theta_2)
Beispiel #13
0
def Correlation_Coefficient_binary(hi, hj):
    a, b, c, d = contingency_table(hi, hj)
    denominator = (a + b) * (a + c) * (c + d) * (b + d)
    denominator = np.sqrt(denominator)
    return (a * d - b * c) / check_zero(denominator)
Beispiel #14
0
def Q_Statistic_binary(hi, hj):
    a, b, c, d = contingency_table(hi, hj)
    tem = a * d + b * c
    return (a * d - b * c) / check_zero(tem)
Beispiel #15
0
def Entropy_sk_multiclass(yt, y, m, nb_cls):
    rho_x = number_individuals_correctly(yt, y)
    rho_x = np.array(rho_x, dtype=DTY_FLT)
    tmp = list(map(min, rho_x, nb_cls - rho_x))
    denominator = nb_cls - np.ceil(nb_cls / 2.)
    return np.sum(tmp) / float(m) / check_zero(denominator)