def corr_iita(dataset, A): """ Corrected Inductive Item Tree Analysis Performs the corrected inductive item tree analysis procedure and returns the corresponding diff values. :param dataset: dataframe or matrix consisted of ones and zeros :param A: list of competing quasi orders :return: dictionary """ data = dataset if isinstance(dataset, pd.DataFrame): data = dataset.as_matrix() b = ob_counter(data) if sum(b.sum(axis=0) == 0): sys.exit('Each item must be solved at least once') n, m = data.shape bs = [] for i in range(len(A)): bs.insert(i, np.zeros((m, m))) diff_value_alt = np.repeat(0.0, len(A)) error = np.repeat(0.0, len(A)) # computation of error rate for k in range(len(A)): for i in A[k]: error[k] += (b[i[0]][i[1]] / data[:, i[1]].sum()) if not A[k]: error[k] = None else: error[k] /= len(A[k]) # computation of diff values all_imp = set() for i in range(m - 1): for j in range(i + 1, m): all_imp = all_imp.union(all_imp, {(i, j), (j, i)}) for k in range(len(A)): if not A[k]: diff_value_alt[k] = None else: for i in all_imp: if i in A[k]: bs[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum() if (i not in A[k]) and ((i[1], i[0]) not in A[k]): bs[k][i[0]][i[1]] = ( 1.0 - data[:, i[0]].sum() / n) * data[:, i[1]].sum() if (i not in A[k]) and ((i[1], i[0]) in A[k]): bs[k][i[0]][i[1]] = data[:, i[1]].sum( ) - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k] diff_value_alt[k] = ((b - bs[k])**2).sum() / (m**2 - m) return {'diff.value': diff_value_alt, 'error.rate': error}
def iita(dataset, v): """ Inductive Item Tree Analysis Performs one of the three inductive item tree analysis algorithms (minimized corrected, corrected and original). :param dataset: dataframe or matrix consisted of ones and zeros :param v: algorithm: v=1 (minimized corrected), v=2 (corrected) and v=3 (original) :return: dictionary """ if (not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, np.ndarray)) or (dataset.shape[1] == 1): sys.exit( 'data must be either a numeric matrix or a dataframe, with at least two columns.' ) data = dataset if isinstance(dataset, pd.DataFrame): data = dataset.as_matrix() print(data) if np.logical_not(np.logical_or(data == 0, data == 1)).sum() != 0: sys.exit('data must contain only 0 and 1') if v not in (1, 2, 3): sys.exit('IITA version must be specified') # inductively generated set of competing quasi orders i = ind_gen(ob_counter(data)) # call chosen algorithm if v == 1: ii = mini_iita(data, i) elif v == 2: ii = corr_iita(data, i) elif v == 3: ii = orig_iita(data, i) index = list(ii['diff.value']).index(min(ii['diff.value'])) return { 'diff': ii['diff.value'], 'implications': i[index], 'error.rate': ii['error.rate'][index], 'selection.set.index': index, 'v': v }
def mini_iita(dataset, A): """ Minimized Corrected Inductive Item Tree Analysis Performs the minimized corrected inductive item tree analysis procedure and returns the corresponding diff values. :param dataset: dataframe or matrix consisted of ones and zeros :param A: list of competing quasi orders :return: dictionary """ data = dataset if isinstance(dataset, pd.DataFrame): data = dataset.as_matrix() b = ob_counter(data) n, m = data.shape bs_num = [] for i in range(len(A)): bs_num.insert(i, np.zeros((m, m))) p = [] for i in range(m): p.insert(i, data[:, i].sum()) diff_value_alt = np.repeat(0.0, len(A)) error = np.repeat(0.0, len(A)) # computation of error rate for k in range(len(A)): x = np.repeat(0.0, 4) for i in range(m): for j in range(m): if (i != j) and ((i, j) in A[k]): x[1] += -2 * b[i, j] * p[j] x[3] += 2 * p[j] ** 2 if (i != j) and ((i, j) not in A[k]) and ((j, i) in A[k]): x[0] += -2 * b[i, j] * p[i] + 2 * p[i] * p[j] - 2 * p[i] ** 2 x[2] += 2 * p[i] ** 2 error[k] = -(x[0] + x[1]) / (x[2] + x[3]) # computation of diff values all_imp = set() for i in range(m - 1): for j in range(i + 1, m): all_imp = all_imp.union(all_imp, {(i, j), (j, i)}) for k in range(len(A)): if not A[k]: diff_value_alt[k] = None else: for i in all_imp: if i in A[k]: bs_num[k][i[0]][i[1]] = error[k] * data[:, i[1]].sum() if (i not in A[k]) and ((i[1], i[0]) not in A[k]): bs_num[k][i[0]][i[1]] = (1.0 - data[:, i[0]].sum() / float(n)) * data[:, i[1]].sum() if (i not in A[k]) and ((i[1], i[0]) in A[k]): bs_num[k][i[0]][i[1]] = data[:, i[1]].sum() - data[:, i[0]].sum() + data[:, i[0]].sum() * error[k] diff_value_alt[k] = ((b - bs_num[k]) ** 2).sum() / (m ** 2 - m) return {'diff.value': diff_value_alt, 'error.rate': error}
# 1 2 3 4 5 6 7 8 9 10 data = [ [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], #1 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], #2 [0, 1, 1, 0, 0, 0, 0, 0, 0, 0], #3 [0, 1, 1, 0, 1, 0, 0, 0, 0, 0], #4 [0, 1, 1, 0, 1, 0, 1, 0, 0, 0], #5 [1, 0, 1, 1, 0, 1, 0, 0, 0, 0], #6 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], #7 [0, 1, 1, 1, 1, 0, 1, 0, 1, 0], #8 [0, 1, 0, 1, 0, 1, 0, 1, 1, 0], #9 [1, 1, 1, 1, 1, 1, 0, 1, 1, 1] ] #10 data = np.mat(data) #a = kst.iita(testmatrix,3) A = kst.ind_gen(kst.ob_counter(data)) b = kst.ob_counter(data) n, m = data.shape bs_num = [] for i in range(len(A)): bs_num.insert(i, np.zeros((m, m))) p = [] for i in range(m): p.insert(i, data[:, i].sum()) diff_value_alt = np.repeat(0.0, len(A)) error = np.repeat(0.0, len(A)) # computation of error rate