def score_mc(d_ak, **kwargs): d = d_ak['data'] ak_ = d_ak['anskey'] keys = tools.getkeys(ak_, 'Row', 'Core') correct = ak_.core_col['Correct'] ak = ['Cols', dict(zip(keys, correct))] d.score_mc(ak, **kwargs) d = core.Damon(d.score_mc_out, 'datadict', 'whole', verbose=None) return d.whole
def get_next_item(self, resps): """Get next item(s) to deliver to student.""" resps_ = pd.read_json(resps) stud = resps_.columns.values[0] if stud in self.persons: df = self.persons[stud] resps_['iter'] = df.iloc[-1, 1] + 1 df = df.append(resps_) else: df = resps_ df['iter'] = 0 self.persons[stud] = df data = df.loc[:, stud].to_frame(stud).transpose() d = core.Damon(data, 'dataframe', 'RCD_dicts_whole', validchars=self.validchars, verbose=None) # Score multiple choice if available try: d.score_mc(anskey=self.bank) except damon1.utils.score_mc_Error: pass # Standardize if available try: d.standardize(std_params=self.bank) except damon1.utils.standardize_Error: pass print('\ndf=\n', df) print('\nd=\n', d) eng = self.engine getattr(d, eng[0].__name__)(**eng[1]) print('d.rasch_out=\n', d.rasch_out) sys.exit()
def coord(data, **kwargs): d = data bankfile = TEMP_PATH + 'ibank.pkl' if kwargs['anchors'] is not None: try: os.remove(bankfile) except: pass kwargs_ = kwargs.copy() kwargs_['anchors'] = None d.coord(**kwargs_) d.bank(bankfile) d.coord(**kwargs) d = core.Damon(d.coord_out['fac0coord'], 'datadict', 'RCD', verbose=None) return d.coredata
def rasch(data, **kwargs): d = data bankfile = TEMP_PATH + 'ibank.pkl' # Build bank first if kwargs['anchors'] is not None: try: os.remove(bankfile) except: pass kwargs_ = kwargs.copy() kwargs_['anchors'] = None d.rasch(**kwargs_) d.bank(bankfile) d.rasch(**kwargs) d = core.Damon(d.rasch_out['estimates'], 'datadict', 'whole', verbose=None) return d.whole
def extract_valid(data, **kwargs): d = data d.extract_valid(**kwargs) d = core.Damon(d.extract_valid_out, 'datadict', 'whole', verbose=None) return d.whole
def merge_info(data_info, target_axis, get_validchars): d = data_info['data'] info = data_info['anskey'] d.merge_info(info, target_axis, get_validchars) d = core.Damon(d.merge_info_out, 'datadict', 'whole', verbose=None) return d.whole
def read_winsteps(data): """Convert Winsteps control file in Damon object Returns ------- {'data':Damon object, 'anskey':answer key } Comments -------- This function was a quick and dirty effort to read a Winsteps control file for a particular case. It probably won't work on your files without some editing. Save a copy and edit it to fit your situation. Arguments --------- "data" is a path name to a Winsteps control file that contains both specifications and data. """ clean_lines = [] # Get clean list of lines, capturing some variables with open(data, 'rb') as f: lines = f.readlines() for i, line in enumerate(lines): line = line.replace('"', "").strip() clean_lines.append(line) if 'Item1' in line: start_resp = int(line[line.find('Item1') + 6:]) - 1 if 'Name1' in line: start_name = int(line[line.find('Name1') + 6:]) - 1 if 'Codes' in line: validchars_ = line[line.find('Codes') + 6:] if 'Key' in line: key = line[line.find('Key') + 4:] if '&END' in line: start_items = i + 1 if 'END NAMES' in line: stop_items = i start_data = i + 1 # Get varianbles items = clean_lines[start_items:stop_items] validchars = ['All', list(validchars_)] anskey = dict(zip(items, list(key))) data_lines = clean_lines[start_data:] persons = [] person_resps = [] nitems = len(items) # Read the data file, parse out persons for line in data_lines: x = line[start_name:start_resp].strip() person = x.replace(' ', '') # Remove gaps in person ids (temp) persons.append(person) resps = list(line[start_resp:start_resp + nitems]) person_resps.append(resps) # Convert into arrays persons.insert(0, 'id') items.insert(0, 'id') rowlabels = np.array(persons)[:, np.newaxis] collabels = np.array(items)[np.newaxis, :] coredata = np.array(person_resps) # Build datadict for Damon datadict = { 'rowlabels': rowlabels, 'collabels': collabels, 'coredata': coredata, 'nheaders4rows': 1, 'key4rows': 0, 'rowkeytype': 'S60', 'nheaders4cols': 1, 'key4cols': 0, 'colkeytype': 'S60', 'validchars': validchars, 'nanval': '-999', } d = dmn.Damon(datadict, 'datadict', verbose=True) return {'data': d, 'anskey': anskey}
data = dmn.Damon( data= 'a_data_rasch_0_example.csv', # [<array, file, [file list], datadict, Damon object, hd5 file> => data in format specified by format_=] format_= 'textfile', # [<'textfile', ['textfiles'],'array','datadict','datadict_link','datadict_whole','Damon','hd5','pickle'>] workformat= 'RCD_dicts_whole', # [<'RCD','whole','RCD_whole','RCD_dicts','RCD_dicts_whole'>] validchars=[ 'All', [0, 1], 'Num' ], # [<None,['All',[valid chars],<'Num','Guess','SkipCheck',omitted>],['Cols',{'ID1':['a','b'],'ID2':['All'],'ID3':['1.2 -- 3.5'],'ID4':['0 -- '],...}]>] nheaders4rows=1, # [number of columns to hold row labels] key4rows=0, # [<None, nth column from left which holds row keys>] rowkeytype=int, # [<None, type of row keys>] nheaders4cols=1, # [number of rows to hold column labels] key4cols=0, # [<None, nth row from top which holds column keys>] colkeytype=int, # [<None, type of column keys>] check_dups= 'warn', # [<None,'warn','stop'> => response to duplicate row/col keys] dtype=[ object, 3 ], #[object, None], # [[type of 'whole' matrix, <None, int number of decimals>], e.g. ['S60',8],[object,None] ] nanval= -999, # [Value to which non-numeric/invalid characters should be converted.] missingchars=None, # [<None, [list of elements to make missing]>] miss4headers= None, # [<None, [[list of elements to make missing in headers]>] recode= None, # [<None,{0:[[slice(StartRow,EndRow),slice(StartCol,EndCol)],{RecodeFrom:RecodeTo,...}],...}>] cols2left= None, # [<None, [ordered list of col keys, to shift to left and use as rowlabels]>] selectrange=None, # [<None,[slice(StartRow,EndRow),slice(StartCol,EndCol)]>] delimiter= ',', # [<None, character to delimit input file columns (e.g. ',' for .csv and ' ' for .txt tab-delimited files)] pytables= None, # [<None,'filename.hd5'> => Name of .hd5 file to hold Damon outputs] verbose=True, # [<None, True> => report method calls] )
# looks to the validchars parameter to figure out the rating scale. vc = {} for item in range(1, ncols + 1): if item <= ncols / 2.0: vc[item] = [0, 1] else: vc[item] = [0, 1, 2] validchars = ['Cols', vc, 'Num'] # Load dataset using Damon. The missing parameters go to their defaults. data = dmn.Damon(data = 'a_data_rasch_1_example.csv', # [<array, file, [file list], datadict, Damon object, hd5 file> => data in format specified by format_=] format_ = 'textfile', # [<'textfile', ['textfiles'],'array','datadict','datadict_link','datadict_whole','Damon','hd5','pickle'>] workformat = 'RCD_dicts_whole', # [<'RCD','whole','RCD_whole','RCD_dicts','RCD_dicts_whole'>] validchars = validchars, # [<None,['All',[valid chars],<'Num','Guess','SkipCheck',omitted>],['Cols',{'ID1':['a','b'],'ID2':['All'],'ID3':['1.2 -- 3.5'],'ID4':['0 -- '],...}]>] nheaders4rows = 1, # [number of columns to hold row labels] key4rows = 0, # [<None, nth column from left which holds row keys>] rowkeytype = int, # [<None, type of row keys>] nheaders4cols = 2, # [number of rows to hold column labels] key4cols = 0, # [<None, nth row from top which holds column keys>] colkeytype = int ) # Analyze with Rasch model. Note the groups parameter. data.rasch(groups = {'row':1}, # [<None, {'row':int row of group labels}, ['key', {'group0':['i1', i2'],...}], ['index', {'group0':[0, 1],...}]> => identify groups] anchors = None, # [<None, {'Bank':<pickle file>, 'row_ents':[<None,'All',row entity list>], 'col_ents':[<None,'All',col entity list>]}> ] runspecs = [0.0001,20], # [<[stop_when_change, max_iteration]> => iteration stopping conditions ] minvar = 0.001, # [<decimal> => minimum row/col variance allowed during iteration] maxchange = 10, # [<+num> => maximum change allowed per iteration] )
def build_strat_table(loaded, item, raw_score='RawScore', group='Sex', strata='all_scores'): "Build ability stratified table of score counts by group" # Get scores, raw scores, groups d = loaded scores = d.core_col[item] sum_scores = d.core_col[raw_score] groups = d.rl_col[group] unique_raws = np.unique(sum_scores[sum_scores != d.nanval]) # Associate raw scores with strata if strata == 'all_scores': interval = 1 strata = np.arange(len(unique_raws)) else: interval = int(len(unique_raws) / float(strata)) strata = np.arange(strata) # Strata bins are of equal length, except the bottom bin which captures the remainder. strat_vals = np.zeros(np.shape(unique_raws)) svals = np.repeat(strata, interval) strat_vals[-len(svals):] = svals strat_lookup = dict(zip(unique_raws, strat_vals)) # Get stratum for each person person_strat = np.zeros(np.shape(sum_scores)) for score in unique_raws: person_strat[sum_scores == score] = strat_lookup[score] # Get rating categories cats = np.unique(scores[scores != d.nanval]) # Build stratum and group arrays groups_ = np.unique(groups) stratum = np.repeat(strata, len(groups_)) group = np.tile(groups_, len(strata)) # Labels corner = np.array([['ID', 'Stratum', 'Group']]) collabels = np.append(corner, np.array([cats.astype(int)]), axis=1) rowlabels = np.zeros((len(stratum) + 1, 3), dtype='S20') rowlabels[0, :] = corner rowlabels[1:, 0] = np.arange(len(stratum)) rowlabels[1:, 1] = stratum rowlabels[1:, 2] = group # Build counts table core = np.zeros((len(stratum), len(cats))) for row, strat in enumerate(stratum): for col, cat in enumerate(cats): gr = group[row] core[row, col] = count_cats(person_strat, groups, scores, strat, gr, cat) # Build Damon object counts = { 'rowlabels': rowlabels, 'collabels': collabels, 'coredata': core, 'key4rows': 0, 'rowkeytype': int, 'key4cols': 0, 'colkeytype': 'S60', 'nanval': d.nanval, 'validchars': ['All', ['All'], 'Num'] } counts = dmn.Damon(counts, 'datadict', 'RCD_dicts_whole', verbose=None) # Check that each stratum has sufficient counts, > 1 for strat in strata: cats = counts.extract(counts, getrows={ 'Get': 'NoneExcept', 'Labels': 'Stratum', 'Rows': [strat] }) if np.sum(cats['coredata']) <= 1: exc = 'Insufficient data for one of the strata for an item.' raise dif_stats_Error(exc) return counts
def dif_stats( filename, # [<'my/file.txt',...> => name of scored data file] student_id='Student_ID', # [<'Student_ID', ...> => student id column label] group=[ 'Sex', { 'focal': 0, 'ref': 1 } ], # [<e.g.'Sex', {'focal':'female', 'ref':'male'}]> => column label with assignment to focal and reference] raw_score='RawScore', # [<'RawScore',...> => raw score column label] items='All', # [<'All', ['item1', 'item3',...]> => items for which to get stats] stats='All', # [<'All', [see list in docs]> => desired statistics] strata=( 'all_scores', 4), # [<'all_scores', int> => number of raw score strata to apply] getrows=None, # [<None, {'Get':_,'Labels':_,'Rows':_}> => select rows using extract() syntax] getcols=None, # [<None, {'Get':_,'Labels':_,'Cols':_}> => select cols using extract() syntax] delimiter='\t', # [<',', '\t'> => column delimiter] ): "Calculate DIF stats for each in a range of items" # Load data d = load_scores(filename=filename, getrows=getrows, getcols=getcols, labelcols=[student_id, group[0]], key4rows=[student_id, 'S60', 'warn_dups'], delimiter=delimiter) if items == 'All': items = dmnt.getkeys(d, 'Col', 'Core', 'Auto', None) items = items[items != raw_score] else: items = np.array(items) if stats == 'All': stats = [ 'MH_alpha', 'MH_dif', 'MH_d-dif', 'MH_var', 'MH_d-var', 'MH_z', 'MH_pval', 'MH_chisq', 'MH_chisq_pval', 'M_dif', 'M_var', 'M_z', 'M_pval', 'M_chisq', 'M_chisq_pval', 'SMD_dif', 'SMD_var', 'SMD_z', 'SMD_pval', 'SMD_chisq', 'SMD_chisq_pval', 'SD', 'SMD/SD', 'Flag', 'Counts' ] if 'Flag' in stats: flag_stats = [ 'MH_d-dif', 'MH_var', 'MH_pval', 'SMD_dif', 'SD', 'SMD/SD', 'M_chisq_pval' ] for stat in flag_stats: if stat not in stats: stats.append(stat) if 'SMD/SD' in stats: smd_sd_stats = ['SMD_dif', 'SD'] for stat in smd_sd_stats: if stat not in stats: stats.append(stat) if 'Counts' in stats: count_stats = [ 'Count_Ref', 'Count_Focal', 'Count_All', ] for stat in count_stats: if stat not in stats: stats.insert(0, stat) stats.remove('Counts') # Initialize DIF table corner = np.array([['Item', 'N_Cats']]) collabels = np.append(corner, np.array([stats]), axis=1) rowlabels = np.zeros((len(items) + 1, 2), dtype='S60') rowlabels[0, :] = corner[0] rowlabels[1:, 0] = np.array(items) core = np.zeros((len(items), len(stats))) # Get stats for each item for i, item in enumerate(items): try: tab = build_strat_table(loaded=d, item=item, raw_score=raw_score, group=group[0], strata=strata[0]) except (damon1.utils.Damon_Error, dif_stats_Error): # Try with backup strata parameter try: tab = build_strat_table(loaded=d, item=item, raw_score=raw_score, group=group[0], strata=strata[1]) except (damon1.utils.Damon_Error, dif_stats_Error): print( 'Warning in tools.dif_stats(): Unable to build a ' 'stratification table for: ' 'stratum=', strata, 'item=', item) core[i, :] = d.nanval continue ncats = np.size(tab.coredata, axis=1) continuity_correction = True if ncats == 2 else False rowlabels[i + 1, 1] = ncats # Flag needed DIF functions run_dif_MH = False MH_stats = [] for stat in stats: if 'MH_' in stat and ncats <= 2: MH_stats.append(stat) run_dif_MH = True run_dif_M = False M_stats = [] for stat in stats: if 'M_' in stat: M_stats.append(stat) run_dif_M = True run_dif_smd = False smd_stats = [] for stat in stats: if 'SMD' in stat and ncats > 2: smd_stats.append(stat) run_dif_smd = True run_sd = False for stat in stats: if 'SD' in stat: run_sd = True run_counts = False for stat in stats: if 'Count' in stat: run_counts = True # Get item standard deviation stat_ = {} if run_sd is True: ivals = d.core_col[item] item_sd = np.std(ivals[ivals != d.nanval]) stat_['SD'] = item_sd # Get counts if run_counts is True: ivals = d.core_col[item] gvals = d.rl_col[group[0]] valid = ivals != d.nanval stat_['Count_All'] = np.sum(valid) stat_['Count_Focal'] = np.sum((valid) & (gvals == str(group[1]['focal']))) stat_['Count_Ref'] = np.sum((valid) & (gvals == str(group[1]['ref']))) # Calculate MH DIF if run_dif_MH is True: dif_MH_out = dif_MH(tab, group[1]['focal'], group[1]['ref']) for stat in MH_stats: stat_[stat] = dif_MH_out[stat] # Calculate M DIF if run_dif_M is True: dif_M_out = dif_M(tab, group[1]['focal'], group[1]['ref'], continuity_correction) for stat in M_stats: stat_[stat] = dif_M_out[stat] # Calculate SMD DIF if run_dif_smd is True: dif_smd_out = dif_smd(tab, group[1]['focal'], group[1]['ref']) for stat in smd_stats: if stat != 'SMD/SD': stat_[stat] = dif_smd_out[stat] else: stat_[stat] = dif_smd_out['SMD_dif'] / item_sd # Calculate DIF flag if 'Flag' in stats: if ncats == 2: d_dif = np.abs(stat_['MH_d-dif']) se = np.sqrt(stat_['MH_var']) pval = stat_['MH_pval'] z_crit = (d_dif - 1.0) / se if d_dif > 1.5 and z_crit > 1.645: stat_['Flag'] = 2 elif d_dif < 1.0 or pval > 0.05: stat_['Flag'] = 0 else: stat_['Flag'] = 1 else: smd_sd = np.abs(stat_['SMD/SD']) p_val = stat_['M_chisq_pval'] if smd_sd > 0.25 and p_val < 0.05: stat_['Flag'] = 2 else: stat_['Flag'] = 0 # Populate table for j, stat in enumerate(stats): if 'MH_' in stat and ncats > 2: core[i, j] = d.nanval elif 'SMD' in stat and ncats <= 2: core[i, j] = d.nanval else: core[i, j] = stat_[stat] # Build table tab_dict = { 'rowlabels': rowlabels, 'collabels': collabels, 'coredata': core, 'key4rows': 0, 'rowkeytype': 'S60', 'key4cols': 0, 'colkeytype': 'S60', 'nanval': d.nanval, 'validchars': ['All', ['All'], 'Num'] } tab_obj = dmn.Damon(tab_dict, 'datadict', 'RCD_dicts_whole', verbose=None) return tab_obj