def test_from_list(self): data = [] with open(self.data_src, 'r') as f: for line in f: if not line.strip(): continue uid, qid, ans = line.strip().split(',') data.append((int(uid), int(qid), int(ans))) item_param, user_param = irt(data, max_iter=2)
def test_2pl_solver(self): item_param, user_param = irt(self.data, theta_bnds=[-theta_range/2,theta_range/2], num_theta=11, alpha_bnds=[0.25,3], beta_bnds=[-3,3], tol=1e-5, max_iter=30) for t in range(T): item_id = item_ids[t] print(item_id, item_param[item_id]) mdl_alpha = item_param[item_id]['alpha'] mdl_beta = item_param[item_id]['beta'] if item_id != 'h': self.assertTrue(abs(mdl_alpha - alpha[t])<0.37) self.assertTrue(abs(mdl_beta - beta[t])<0.16)
def test_2pl_solver_parallel(self): item_param, user_param = irt(self.data, theta_bnds=[-theta_range/2,theta_range/2], num_theta=11, alpha_bnds=[0.25,3], beta_bnds=[-3,3], tol=1e-5, max_iter=30, is_parallel=True, check_interval=0.1) for t in range(T): item_id = 'q%d'%t print(item_id, item_param[item_id]) mdl_alpha = item_param[item_id]['alpha'] mdl_beta = item_param[item_id]['beta'] if item_id != 'q6': self.assertTrue(abs(mdl_alpha - alpha[t])<0.37) self.assertTrue(abs(mdl_beta - beta[t])<0.16)
def irt_estimation( df_gte: pd.DataFrame, difficulty_range=DIFFICULTY_RANGE, discrimination_range=(DEFAULT_DISCRIMINATION, DEFAULT_DISCRIMINATION), guess=DEFAULT_GUESS ) -> (dict, dict): """ This method performs the IRT estimation of the items from the interactions stored in DF GTE. If necessary, adds some artificial answers, since pyirt requires all the questions to have at least one wrong and one correct answer. :param df_gte: :param difficulty_range: :param discrimination_range: :param guess: :return: """ # Add perfectly bad and perfectly good students, in order for pyirt to work q_cnt_per_correctness = df_gte.groupby([Q_ID, CORRECT]).size().reset_index().groupby(Q_ID).size().reset_index() questions_to_add = list(q_cnt_per_correctness[q_cnt_per_correctness[0] < 2][Q_ID]) num_q_to_add = len(questions_to_add) print('[INFO] %d questions to fill in out of %d' % (num_q_to_add, len(df_gte[Q_ID].unique()))) questions_to_add_df = pd.DataFrame( { S_ID: ['P_GOOD'] * num_q_to_add + ['P_BAD'] * num_q_to_add, TIMESTAMP: [None] * 2 * num_q_to_add, CORRECT: [True] * num_q_to_add + [False] * num_q_to_add, Q_ID: questions_to_add + questions_to_add, } ) df = pd.concat([df_gte[ANSWERS_DF_COLS], questions_to_add_df[ANSWERS_DF_COLS]], ignore_index=True) interactions_list = [(user, item, correctness) for user, item, correctness in df[[S_ID, Q_ID, CORRECT]].values] try: item_params, user_params = irt( interactions_list, theta_bnds=difficulty_range, beta_bnds=difficulty_range, alpha_bnds=discrimination_range, in_guess_param={q: guess for q in df[Q_ID].unique()}, max_iter=100 ) except Exception: raise ValueError("Problem in IRTCalibrator. Check if there are items with only correct/wrong answers.") question_dict = dict() question_dict[DIFFICULTY] = dict() question_dict[DISCRIMINATION] = dict() for question, question_params in item_params.items(): question_dict[DIFFICULTY][question] = -question_params['beta'] question_dict[DISCRIMINATION][question] = question_params["alpha"] return question_dict
def test_3pl_solver(self): item_param, user_param = irt(self.data, theta_bnds=[-theta_range/2,theta_range/2], num_theta=11, alpha_bnds=[0.25,3], beta_bnds=[-3,3], in_guess_param=guess_param, tol=1e-5, max_iter=30) for t in range(T): item_id = 'q%d'%t print(item_id, item_param[item_id]) mdl_alpha = item_param[item_id]['alpha'] mdl_beta = item_param[item_id]['beta'] if item_id not in ['q6','q7']: self.assertTrue(abs(mdl_alpha - alpha[t])<0.25) if item_id != 'q8': self.assertTrue(abs(mdl_beta - beta[t])<0.15)
def irt_estimation(interactions_df: pd.DataFrame, difficulty_range=(DIFFICULTY_MIN, DIFFICULTY_MAX), discrimination_range=(DEFAULT_DISCRIMINATION, DEFAULT_DISCRIMINATION), guess=DEFAULT_GUESS) -> (dict, dict): """ Given the input interactions between a set of students and a set of questions, performs with the irt method from pyirt the IRT estimation of the latent traits of students and questions. It returns the dictionaries mapping from the studentID or itemID to the corresponding latent traits. """ interactions_list = [ (user, item, correctness) for user, item, correctness in interactions_df[ [USER_ID_HEADER, QUESTION_ID_HEADER, CORRECT_HEADER]].values ] # if there are some items with only correct or only wrong answers, pyirt crashes question_count_per_correctness = interactions_df.groupby([QUESTION_ID_HEADER, CORRECT_HEADER])\ .size().reset_index().groupby(QUESTION_ID_HEADER).size().reset_index().rename(columns={0: 'cnt'}) list_q_to_add = list(question_count_per_correctness[ question_count_per_correctness['cnt'] == 1][QUESTION_ID_HEADER]) print('[INFO] %d questions filled in' % len(list_q_to_add)) interactions_list.extend([('p_good', itemID, True) for itemID in list_q_to_add]) interactions_list.extend([('p_bad', itemID, False) for itemID in list_q_to_add]) try: item_params, user_params = irt( interactions_list, theta_bnds=difficulty_range, beta_bnds=difficulty_range, alpha_bnds=discrimination_range, in_guess_param={ q: guess for q in interactions_df[QUESTION_ID_HEADER].unique() }, max_iter=100) except Exception: raise ValueError( "Problem in irt_estimation. Check if there are items with only correct/wrong answers." ) question_dict = dict() question_dict[DIFFICULTY_KEY] = dict() question_dict[DISCRIMINATION_KEY] = dict() for question, question_params in item_params.items(): question_dict[DIFFICULTY_KEY][question] = -question_params['beta'] question_dict[DISCRIMINATION_KEY][question] = question_params["alpha"] user_dict = {x[0]: x[1] for x in user_params.items()} return user_dict, question_dict
def test_from_file(self): src_fp = open(self.data_src, 'r') item_param, user_param = irt(src_fp, max_iter=2)
from pyirt import irt src_fp = open(file_path, 'r') # alternatively, pass in list of tuples in the format of [(user_id, item_id, ans_boolean)] # ans_boolean is 0/1. # (1)Run by default item_param, user_param = irt(src_fp) # (2)Supply bounds item_param, user_param = irt(src_fp, theta_bnds=[-4, 4], alpha_bnds=[0.1, 3], beta_bnds=[-3, 3]) # (3)Supply guess parameter guessParamDict = {1: {'c': 0.0}, 2: {'c': 0.25}} item_param, user_param = irt(src_fp, in_guess_param=guessParamDict)
from pyirt import irt if __name__ == "__main__": src_handle = open('data/big_data.txt', 'r') src_data = [] for line in src_handle: if line == '': continue uidstr, eidstr, atagstr = line.strip().split('\t') src_data.append((int(uidstr),int(eidstr),int(atagstr))) src_handle.close() item_param,user_param = irt(src_data[0:1000000])