Esempio n. 1
0
def corr_iita(dataset, A):
    """
    Corrected Inductive Item Tree Analysis
    Performs the corrected inductive item tree analysis procedure and returns the corresponding diff values.

    :param dataset: dataframe or matrix consisted of ones and zeros
    :param A: list of competing quasi orders
    :return: dictionary
    """

    data = dataset
    if isinstance(dataset, pd.DataFrame):
        data = dataset.as_matrix()

    b = ob_counter(data)
    if sum(b.sum(axis=0) == 0):
        sys.exit('Each item must be solved at least once')

    n, m = data.shape

    bs = []
    for i in range(len(A)):
        bs.insert(i, np.zeros((m, m)))

    diff_value_alt = np.repeat(0.0, len(A))
    error = np.repeat(0.0, len(A))

    # computation of error rate
    for k in range(len(A)):
        for i in A[k]:
            error[k] += (b[i[0]][i[1]] / data[:, i[1]].sum())
        if not A[k]:
            error[k] = None
        else:
            error[k] /= len(A[k])

    # computation of diff values
    all_imp = set()
    for i in range(m - 1):
        for j in range(i + 1, m):
            all_imp = all_imp.union(all_imp, {(i, j), (j, i)})

    for k in range(len(A)):
        if not A[k]:
            diff_value_alt[k] = None
        else:
            for i in all_imp:
                if i in A[k]:
                    bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum()
                if (i not in A[k]) and ((i[1], i[0]) not in A[k]):
                    bs[k][i[0]][i[1]] = (
                        1.0 - data[:, i[0]].sum() / n) * data[:, i[1]].sum()
                if (i not in A[k]) and ((i[1], i[0]) in A[k]):
                    bs[k][i[0]][i[1]] = data[:, i[1]].sum(
                    ) - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k]
            diff_value_alt[k] = ((b - bs[k])**2).sum() / (m**2 - m)

    return {'diff.value': diff_value_alt, 'error.rate': error}
Esempio n. 2
0
def iita(dataset, v):
    """
    Inductive Item Tree Analysis
    Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original).

    :param dataset: dataframe or matrix consisted of ones and zeros
    :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original)
    :return: dictionary
    """

    if (not isinstance(dataset, pd.DataFrame)
            and not isinstance(dataset, np.ndarray)) or (dataset.shape[1]
                                                         == 1):
        sys.exit(
            'data must be either a numeric matrix or a dataframe, with at least two columns.'
        )

    data = dataset
    if isinstance(dataset, pd.DataFrame):
        data = dataset.as_matrix()
    print(data)
    if np.logical_not(np.logical_or(data == 0, data == 1)).sum() != 0:
        sys.exit('data must contain only 0 and 1')

    if v not in (1, 2, 3):
        sys.exit('IITA version must be specified')

    # inductively generated set of competing quasi orders
    i = ind_gen(ob_counter(data))

    # call chosen algorithm
    if v == 1:
        ii = mini_iita(data, i)
    elif v == 2:
        ii = corr_iita(data, i)
    elif v == 3:
        ii = orig_iita(data, i)

    index = list(ii['diff.value']).index(min(ii['diff.value']))
    return {
        'diff': ii['diff.value'],
        'implications': i[index],
        'error.rate': ii['error.rate'][index],
        'selection.set.index': index,
        'v': v
    }
Esempio n. 3
0
def mini_iita(dataset, A):
    """
    Minimized Corrected Inductive Item Tree Analysis
    Performs the minimized corrected inductive item tree analysis procedure and returns the corresponding diff values.

    :param dataset: dataframe or matrix consisted of ones and zeros
    :param A: list of competing quasi orders
    :return: dictionary
    """

    data = dataset
    if isinstance(dataset, pd.DataFrame):
        data = dataset.as_matrix()

    b = ob_counter(data)
    n, m = data.shape

    bs_num = []
    for i in range(len(A)):
        bs_num.insert(i, np.zeros((m, m)))

    p = []
    for i in range(m):
        p.insert(i, data[:, i].sum())

    diff_value_alt = np.repeat(0.0, len(A))
    error = np.repeat(0.0, len(A))

    # computation of error rate
    for k in range(len(A)):
        x = np.repeat(0.0, 4)
        for i in range(m):
            for j in range(m):
                if (i != j) and ((i, j) in A[k]):
                    x[1] += -2 * b[i, j] * p[j]
                    x[3] += 2 * p[j] ** 2
                if (i != j) and ((i, j) not in A[k]) and ((j, i) in A[k]):
                    x[0] += -2 * b[i, j] * p[i] + 2 * p[i] * p[j] - 2 * p[i] ** 2
                    x[2] += 2 * p[i] ** 2

        error[k] = -(x[0] + x[1]) / (x[2] + x[3])

    # computation of diff values
    all_imp = set()
    for i in range(m - 1):
        for j in range(i + 1, m):
            all_imp = all_imp.union(all_imp, {(i, j), (j, i)})

    for k in range(len(A)):
        if not A[k]:
            diff_value_alt[k] = None
        else:
            for i in all_imp:
                if i in A[k]:
                    bs_num[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum()
                if (i not in A[k]) and ((i[1], i[0]) not in A[k]):
                    bs_num[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum()
                if (i not in A[k]) and ((i[1], i[0]) in A[k]):
                    bs_num[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k]
            diff_value_alt[k] = ((b - bs_num[k]) ** 2).sum() / (m ** 2 - m)

    return {'diff.value': diff_value_alt, 'error.rate': error}
Esempio n. 4
0
#       1  2  3  4  5  6  7  8  9  10
data = [
    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  #1
    [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],  #2
    [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],  #3
    [0, 1, 1, 0, 1, 0, 0, 0, 0, 0],  #4
    [0, 1, 1, 0, 1, 0, 1, 0, 0, 0],  #5
    [1, 0, 1, 1, 0, 1, 0, 0, 0, 0],  #6
    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  #7
    [0, 1, 1, 1, 1, 0, 1, 0, 1, 0],  #8
    [0, 1, 0, 1, 0, 1, 0, 1, 1, 0],  #9
    [1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
]  #10
data = np.mat(data)
#a = kst.iita(testmatrix,3)
A = kst.ind_gen(kst.ob_counter(data))
b = kst.ob_counter(data)
n, m = data.shape

bs_num = []
for i in range(len(A)):
    bs_num.insert(i, np.zeros((m, m)))

p = []
for i in range(m):
    p.insert(i, data[:, i].sum())

diff_value_alt = np.repeat(0.0, len(A))
error = np.repeat(0.0, len(A))

# computation of error rate