def mod_database(input_scale, prefix, start, stop): vars_in_file = spssaux.VariableDict().Variables df_dict = {} for i in range(start, stop): df = input_scale.reset_index() df['time'] = prefix + str(i) for col in gb.columns_to_use: df[col] = prefix + str(i) + df[col] df_dict[i] = df db = pd.concat(df_dict, ignore_index=True) diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist() db2 = db[db['items'].isin((vars_in_file))] spss.Submit(recode(db2)) rename_db = db2.loc[db['rename'].notnull(), ['items', 'rename']] for i, row in rename_db.iterrows(): orgname = row['items'] new_name = row['rename'] spss.Submit('RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format( orgname=orgname, new_name=new_name)) vars_in_file = spssaux.VariableDict().Variables diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist() vars_not_in_file = 'Variables not in file\n' + '\n'.join(diff_vars) + '\n' print( str(len(diff_vars)) + ' variables not in file. Check log.txt for specification\n') db = db[db['items'].isin((vars_in_file))] return db, vars_not_in_file
def expandvarnames(varnames): """Return varnames with ALL and TO expansion""" # varnames allows the construct v1, v2, ... ALL to coerce the order # as well as TO and ALL expansion vardict = None varnamesLower = [item.lower() for item in varnames] try: # check for and process ALL name allLoc = varnamesLower.index('all') except ValueError: allvars = [] else: vardict = spssaux.VariableDict() if allLoc != len(varnames) - 1: raise ValueError( _("""ALL must be the last item in the variable list""")) allvars = vardict.expand("ALL") varnames = varnames[:-1] # process TO if 'to' in varnamesLower: if not vardict: vardict = spssaux.VariableDict() varnames = vardict.expand(varnames) # append ALL result if it was specified # would use set union but order matters for item in allvars: if not (item in varnames or item.lower() in varnames): varnames.append(item) return varnames
def main(): gb.to_log += """data in = {data_in} data out = {data_out}\n\n""".format(data_in=gb.org_data, data_out=gb.select_rev) with open(log, 'w') as out: out.write(gb.to_log) spssaux.OpenDataFile(gb.org_data) spss.SetOutput("off") db = pd.DataFrame.from_csv(gb.input_csv) db2 = db.loc[db[gb.version].notnull() | db[gb.single_items].notnull() | db['rename'].notnull() | db['recode'].notnull()] vars, to_log2 = mod_database(db2, gb.prefix, gb.start, gb.stop) cmd = prepare_data(vars) with open(syntax_save, 'w') as out: out.write(cmd) list_of_all_vars = vars['items'].values.tolist() vars_in_file = spssaux.VariableDict().Variables to_log1 = str(len(list_of_all_vars)) + ' variables extracted.\n' print(to_log1) with open(log, 'a') as out: out.write(to_log1) how_often(vars, vars_in_file, gb.prefix, gb.start, gb.stop) reverse_items.main(vars, log, syntax_save) cmd = extract_vars(list_of_all_vars, gb.urvalsinfo, gb.select_rev, gb.keep_vars) with open(syntax_save, 'a') as out: out.write(cmd + '\n') #spss.Submit(cmd) with open(log, 'a') as out: out.write(to_log2) print("--- %s seconds ---" % (time.time() - start_time))
def dicotomize(db): sdict = spssaux.VariableDict() cmd = '' for typ in columns_to_use: if typ == version: mod_cut = cut_mod suffix = '_middle' if typ == single_items: mod_cut = single_items_cut_mod suffix = '_single' for var in db['items']: mod_val = db.loc[db['items'] == var, mod_cut ] mod = 0 if pd.notnull(mod_val.iloc[0]): mod = int(mod_val.iloc[0]) key_list = [] labels = sdict[var].ValueLabels for key in labels: if 0 < int(key) < 77: key_list.append(int(key)) min_val,max_val = min(key_list),max(key_list) if max_val - min_val % 2 == 1: # if even limit = max_val / 2 else: # if odd limit = (max_val + 1 )/2 if limit: limit = limit + mod cmd += 'RECODE {old_var} (1 thru {low_lim}=0) ({high_lim} thru 76=1) INTO {new_var}.\n'.format(low_lim=limit,high_lim=limit+1,old_var=var,new_var=var+suffix) if 'x6_2occdev1' == var: print(var) cmd += 'EXECUTE.\n' print(cmd) return cmd
def reverse_items(db, log, syntax_save): cmd = '' reverse_list = db.loc[db['reverse'] == 1, ['items']].values.tolist() n = 0 sdict = spssaux.VariableDict() for item in reverse_list: n += 1 cmd += get_reverse(item[0], sdict, log) return cmd, n
def display(variables, withvars, stats, matnames, clevel, missing, inclusion): """Display pivot table output for regular or split files variables is the main variable list withvars is None or a list of variables to correlate with stats is the result structure clevel is the confidence level missing is listwise or pairwise include is include or exclude for user missing values""" spss.StartProcedure(_("Correlations"), "CICORRELATIONS") tbl = spss.BasePivotTable(_("Correlations"), "CICORRELATIONS") tbl.Caption( _("""Missing value handling: %s, %s. C.I. Level: %s""") % (missing, inclusion, clevel)) rowsplits = [] for v in spss.GetSplitVariableNames(): rowsplits.append(tbl.Append(spss.Dimension.Place.row, v)) nsplitvars = len(rowsplits) var1 = tbl.Append(spss.Dimension.Place.row, _("Variable")) var2 = tbl.Append(spss.Dimension.Place.row, _("Variable2")) vlist = withvars and withvars or variables col1 = tbl.Append(spss.Dimension.Place.column, _("Statistic")) tbl.SetCategories(col1, [ CellText.String(_("Correlation")), CellText.String(_("Count")), CellText.String(_("Lower C.I.")), CellText.String(_("Upper C.I.")), CellText.String(_("Notes")) ]) vardict = spssaux.VariableDict() for vcount, s in enumerate(stats): for i, vv in enumerate(vlist): j = i + (withvars is not None and len(variables)) if nsplitvars > 0: rows = copy.copy(s.splitvars) else: rows = [] ###rows.append(s.variable) rows.append( CellText.VarName( vardict[s.variable.toString().rstrip()].VariableIndex)) vvitem = vardict[vv].VariableIndex rows.append(CellText.VarName(vvitem)) # 2/9/2022 if s.ns[j].toNumber() > 10: note = "" elif s.ns[j].toNumber() <= 3: note = _("Some items not computed") else: note = _("Normality assumption is not accurate") statsi = [ s.corrs[j], s.ns[j], s.cis[j][0], s.cis[j][1], CellText.String(note) ] tbl.SetCellsByRow(rows, statsi) ###tbl.SetCellsByRow(rows, [Ctn(item) for item in statsi]) spss.EndProcedure()
def impute_item_for_item(): cmd = '' sdict = spssaux.VariableDict() for value, gender in sdict[gb.gender].ValueLabels.items(): cmd += ("""DATASET ACTIVATE orginal. DATASET COPY {gender}. DATASET ACTIVATE {gender}. FILTER OFF. USE ALL. SELECT IF ({gender_var} = {val}). EXECUTE.\n""".format(val=value, gender=gender, gender_var=gb.gender)) for var in db_glob['items'].loc[db_glob['items'].notnull()]: try: var_list = [ gb.prefix + str(i) + var for i in range(gb.start, gb.stop) ] var_list = [var for var in var_list if var in sdict] except: print(var) print(type(var)) print(var) if len(var_list) > 0: cmd += 'DATASET DECLARE {var}.\n'.format(var=var + '_' + gender) cmd += 'MULTIPLE IMPUTATION {all_vars} \n'.format( all_vars=' '.join(var_list)) cmd += ' /IMPUTE METHOD=FCS MAXITER= 10 NIMPUTATIONS=25 SCALEMODEL=LINEAR INTERACTIONS=NONE \n SINGULAR=1E-012 MAXPCTMISSING=NONE \n' for prefixed_var in var_list: vals = [ val for val in sdict[prefixed_var].ValueLabels if val not in gb.missing ] if min(vals) < 1 or max(vals) > 10: print('warning: \nmin = {min} \nmax = {max}'.format( min=min(vals), max=max(vals))) cmd += ' /CONSTRAINTS {prefixed_var}( MIN={min} MAX={max} RND=1)\n'.format( prefixed_var=prefixed_var, min=min(vals), max=max(vals)) cmd += ' /MISSINGSUMMARIES NONE\n' cmd += ' /IMPUTATIONSUMMARIES MODELS\n' cmd += ' /OUTFILE IMPUTATIONS={var} .\n'.format(var=var + '_' + gender) cmd += 'DATASET ACTIVATE {var}.\n'.format(var=var + '_' + gender) cmd += 'SAVE OUTFILE = "{var}.sav"\n'.format(var=var + '_' + gender) cmd += ' /KEEP Imputation_ {ID} {gender_var} {vars}\n'.format( vars=' '.join(var_list), gender_var=gb.gender, ID=gb.id) cmd += ' /COMPRESSED.\n' cmd += 'DATASET ACTIVATE {gender}.\n'.format(gender=gender) return cmd
def dolabels(variables=None, varpattern=None, lblvars=None, lblpattern=None, execute=True, varsperpass=20, syntax=None): """Execute STATS VALLBLS FROMDATA""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(1) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ### ###SpssClient._heartBeat(False) #except: #pass try: vardict = spssaux.VariableDict(caseless=True) except: raise ValueError(_("""This command requires a newer version the spssaux module. \n It can be obtained from the SPSS Community website (www.ibm.com/developerworks/spssdevcentral)""")) varstolabel = resolve(vardict, _("variables to label"), variables, varpattern, stringonly=False) labelvars = resolve(vardict, _("label variables"), lblvars, lblpattern, stringonly=True) if len(varstolabel) == 0 or len(labelvars) == 0: raise ValueError(_("""No variables to label or no labelling variables were specified. If a pattern was used, it may not have matched any variables.""")) if len(labelvars) > 1 and len(labelvars) != len(varstolabel): raise ValueError(_("The number of label variables is different from the number of variables to label")) if min([vardict[item].VariableType for item in labelvars]) == 0: raise ValueError(_("""The label variables must all have type string""")) dsname = spss.ActiveDataset() if dsname == "*": raise ValueError(_("""The active dataset must have a dataset name in order to use this procedure""")) if syntax: syntax = syntax.replace("\\", "/") syntax = FileHandles().resolve(syntax) mkvl = Mkvls(varstolabel, labelvars, varsperpass, execute, syntax, vardict) for i in range(0, len(varstolabel), varsperpass): spss.Submit("""DATASET ACTIVATE %s""" % dsname) mkvl.doaggr(i) spss.Submit("""DATASET ACTIVATE %s""" % dsname) labelsyntax = mkvl.dolabels() if labelsyntax and execute: spss.Submit(labelsyntax) mkvl.report(labelsyntax) if labelsyntax and syntax: writesyntax(labelsyntax, syntax, mkvl)
def reverse_items(): cmd = '' sdict = spssaux.VariableDict() for row in df.values.tolist(): if pd.notnull(row[2]): rev = [int(x) for x in row[2].split(",")] rev_items = [row[5:][x - 1] for x in rev] for item in rev_items: cmd += get_reverse(item, sdict) return cmd
def mod_database(input_scale, prefix, start, stop): vars_in_file = spssaux.VariableDict().Variables df_dict = {} for i in range(start, stop): df = input_scale.reset_index() df['time'] = prefix + str(i) df["indep"] = prefix + str(i) + df["indep"] df_dict[i] = df db = pd.concat(df_dict, ignore_index=True) db = db[db['indep'].isin((vars_in_file))] return db
def Run(args): """Execute the SPSSINC ANON extension command""" args = args[list(args.keys())[0]] oobj = Syntax([ Template("VARIABLES", subc="", ktype="existingvarlist", var="varnames", islist=True), Template("SVALUEROOT", subc="OPTIONS", ktype="literal", var="svalueroot"), Template("NAMEROOT", subc="OPTIONS", ktype="varname", var="nameroot"), Template("METHOD", subc="OPTIONS", ktype="str", var="method", vallist=['random', 'sequential', 'transform']), Template("SEED", subc="OPTIONS", ktype="float", var="seed"), Template("OFFSET", subc="OPTIONS", ktype="float", var="offset"), Template("SCALE", subc="OPTIONS", ktype="float", var="scale"), Template("MAXRVALUE", subc="OPTIONS", ktype="int", var="maxrvalue", islist=True), Template("ONETOONE", subc="OPTIONS", ktype="existingvarlist", var="onetoone", islist=True), Template("MAPPING", subc="OPTIONS", ktype="literal", var="mapping"), Template("NAMEMAPPING", subc="SAVE", ktype="literal", var="namemapping"), Template("VALUEMAPPING", subc="SAVE", ktype="literal", var="valuemapping"), Template("IGNORETHIS", subc="SAVE", ktype="bool", var="ignorethis"), Template("HELP", subc="", ktype="bool") ]) # A HELP subcommand overrides all else if "HELP" in args: #print helptext helper() else: processcmd(oobj, args, anon, vardict=spssaux.VariableDict())
def add_suffix(folder): for item in ["/no_strings", "/strings"]: datafiles = get_filelist(folder + item, 'sav') for file in datafiles: print(file) exclude = ['kod_id'] #Ange namnet på id_variablen spssaux.OpenDataFile(file) basename = os.path.basename(file).strip('.sav') suffix = basename #önskat suffix print(basename) vars = spssaux.VariableDict().variables for i in exclude: if i in vars: vars.remove(i) oldnames = spssaux.VariableDict().expand(vars) newnames = [varnam + "_" + suffix for varnam in oldnames] spss.Submit('rename variables (%s=%s).' % ('\n'.join(oldnames), '\n'.join(newnames))) spss.Submit(""" SAVE OUTFILE = "%s%s". DATASET CLOSE ALL. NEW FILE. """ % (folder + item + '/suffix/', basename + '.sav'))
def fix_files(folder, datevars, id_variable, date): datafiles = get_filelist(folder, "sav") for fil in datafiles: exclude = [] spssaux.OpenDataFile(fil) vars_in_file = spssaux.VariableDict().variables rename_vars(vars_in_file) spss.Submit("ALTER TYPE %s (f8)." % id_variable) stringvars = get_stringvars() print(stringvars) vars_in_file = spssaux.VariableDict().variables base, ext = os.path.basename(fil).split('.') for var in stringvars: if var in vars_in_file: exclude.append(var) if "AnswerDate" in vars_in_file: spss.Submit(""" COMPUTE time_days = DATEDIFF(AnswerDate, DATE.MDY(%s), "day"). EXECUTE. """ % date) for var in vars_in_file: if 'mean' not in var and var not in exclude + datevars: spss.Submit("ALTER TYPE %s (f8)." % var) spss.Submit("VARIABLE ALIGNMENT %s(right)." % var) spss.Submit("VARIABLE LEVEL %s(scale)." % var) if var != id_variable: spss.Submit("RECODE %s (SYSMIS=999)." % var) spss.Submit("MISSING VALUES %s (999)." % var) spss.Submit("SORT CASES BY %s (A)." % id_variable) spss.Submit("""COMPUTE data_from_week=%s. EXECUTE. ALTER TYPE data_from_week (f8). ALTER TYPE time_days (f8). """ % base) spss.Submit(save_and_close(base, exclude, folder)) spss.Submit(("NEW FILE."))
def compute_means(): cmd = '' rev = list() sdict = spssaux.VariableDict() for row in df.values.tolist(): rev = list() #items = [x for x in row[5:] if pd.notnull(x)] items = [x for x in row[5:] if pd.notnull(x) and x in sdict] print(items) if len(items) > 1: if pd.notnull(row[2]): rev = [int(x) for x in row[2].split(",")] for n in rev: items[n - 1] = items[n - 1] + '_rev' cmd += 'COMPUTE {var}_mean=mean.{min_mean}({items}).\n'.format( var=row[0], min_mean=row[1], items=','.join(items)) cmd += 'Execute.\n' cmd += "VARIABLE LABELS {var}_mean '{lable}'.\n".format( var=row[0], lable=row[4]) return cmd
def Run(args): """Execute the PROPOR command""" debug = False if debug: print(args) #debug args = args[list(args.keys())[0]] # Note that the keys of args are the names of the subcommands that were given. if debug: print(args) # define the syntax oobj = Syntax([ Template("NUM", subc="", ktype="str", islist=True), Template("DENOM", subc="", ktype="str", islist=True), Template("ID", subc="", ktype="existingvarlist", islist=False), Template("HELP", subc="", ktype="bool"), Template("NAME", subc="DATASET", var="dsname", ktype="varname"), Template("ALPHA", subc="LEVEL", ktype="float", vallist=(.0000000001, .99999999999)), ]) # A HELP subcommand overrides all else if "HELP" in args: print(helptext) else: try: # parse and execute the command oobj.parsecmd(args, vardict=spssaux.VariableDict()) ###print oobj.parsedparams dopropor(**oobj.parsedparams) except: # Exception messages are printed here, but the exception is not propagated, and tracebacks are suppressed, # because as an Extension command, the Python handling should be suppressed (unless debug mode) if debug: raise else: print(sys.exc_info()[1])
def how_often(var_list, vars_in_file, prefix, start, stop): sdict = spssaux.VariableDict() nn = 0 for var in var_list: n = 0 list_of_dict = [] for x in range(start, stop): variable = prefix + str(x) + var if variable in vars_in_file: var_label = sdict[variable].ValueLabels for key in ['77', '88', '99']: if key in var_label: del var_label[key] list_of_dict.append(var_label) n += 1 if not checkEqual1(list_of_dict): nn += 1 print(var) to_log = str( nn) + ' variables have different number of response categories.\n' print(to_log) with open(log, 'a') as out: out.write(to_log)
def plots(yvars, xvars, color=None, size=None, shape=None, label=None, linear=False, quadratic=False, cubic=False, loess=False, ignore=False, title="", categorical="bars", group=1, boxplots=False, hexbin=False, applyfitto="total", indent=15, yscale=75, pagex=None, pagey=None): """Create plots per specifcation described in help above""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ## ###SpssClient._heartBeat(False) #except: #pass npage = [pagex, pagey].count(None) # 0 means both specified if npage == 1: raise ValueError( _("Page specification must include both x and y sizes")) if group > 1: boxplots = False spssweight = spss.GetWeightVar() if not spssweight: spssweight = None vardict = spssaux.VariableDict() # display pivot table of legend information fits = [] for i, fittype in enumerate([linear, quadratic, cubic, loess]): if fittype: fits.append(fittypetable[i]) spss.StartProcedure("STATS REGRESS", _("Relationship Plots")) ttitle = _("Chart Legend Information") if title: ttitle = ttitle + "\n" + title tbl = spss.BasePivotTable( ttitle, "CHARTLEGENDINFO", caption= _("Legend Settings for the charts that follow. Some settings do not apply to categorical charts." )) tbl.SimplePivotTable(_("Settings"), rowlabels=[ _("Color by"), _("Size by"), _("Shape by"), _("Label by"), _("Fit Lines") ], collabels=[_("Value")], cells=[ labelit(color, vardict) or "---", labelit(size, vardict) or "---", labelit(shape, vardict) or "---", labelit(label, vardict) or "---", "\n".join(fits) or "---" ]) spss.EndProcedure() # group fitlines only available for categorically defined groups if not color or (color and vardict[color].VariableLevel == "scale"): applyfitto = "total" aesthetics = set([ item for item in [color, size, shape, label, spssweight] if not item is None ]) for y in yvars: yobj = vardict[y] if yobj.VariableLevel != "scale": raise ValueError( _("Y variables must have a scale measurement level: %s") % y) yvarlabel = yobj.VariableLabel or y # construct one possibly multi-part chart for each numcharts variables for xpart in xgen(xvars, group): first = True cmd = [] numcharts = len(xpart) mostvariables = " ".join( set(xpart + list(aesthetics))) # eliminate duplicates (except with y) if spssweight: options = ", weight(%s)" % spssweight else: options = "" cmd.append(ggraphtemplate % { "allvars": y + " " + mostvariables, "options": options }) indentx = indent if npage == 0: # page specs were given if numcharts < group: # short row shortpagex = pagex * indent / 100. + pagex * ( 100. - indent) / 100. * (float(numcharts) / group) indentx = indent * (pagex / shortpagex) cmd.append(pagestarttemplate % { "pagex": shortpagex, "pagey": pagey }) else: cmd.append(pagestarttemplate % { "pagex": pagex, "pagey": pagey }) cmd.append(datatemplate % {"varname": y, "unitcategory": ""}) alldatastatements = set([y.lower()]) if spssweight: cmd.append(gendata(spssweight, vardict, alldatastatements)) # loop over one or more x variables for this chart for currentn, x in enumerate(xpart): xobj = vardict[x] ml = xobj.VariableLevel if numcharts > 1: cmd.append( graphstarttemplate % { "originandscale": scaling(numcharts, currentn, indentx, yscale) }) if boxplots and ml == "scale": cmd.append( graphstarttemplate % {"originandscale": "origin(15%, 10%), scale(75%,75%)"}) if ml == "scale": # build scatterplot specs uc = "" options = "" if size: options = options + ", size(%s)" % size cmd.append(gendata(size, vardict, alldatastatements)) if numcharts > 1: cmd.append(aesth % {"atype": "size"}) if color: options = options + ", color.exterior(%s)" % color cmd.append(gendata(color, vardict, alldatastatements)) if numcharts > 1: cmd.append(aesth % {"atype": "color.exterior"}) if shape: if vardict[shape].VariableLevel == "scale": raise ValueError( _("The shape variable must be categorical: %s") % shape) options = options + ", shape(%s)" % shape cmd.append(gendata(shape, vardict, alldatastatements)) if numcharts > 1: cmd.append(aesth % {"atype": "shape"}) else: uc = iscat if categorical == "bars": cmd.append(include0) if not first: other = ", null()" else: other = "" if title and numcharts == 1 and not boxplots: cmd.append(titletemplate % {"title": title}) cmd.append(gendata(x, vardict, alldatastatements)) if label: cmd.append(gendata(label, vardict, alldatastatements)) #cmd.append(datatemplate % {"varname": x, "unitcategory": uc}) cmd.append(guidetemplate % { "dim": 1, "varlabel": xobj.VariableLabel or x, "other": "" }) if first: cmd.append(guidetemplate % { "dim": 2, "varlabel": yvarlabel, "other": other }) else: cmd.append(noyaxis) if ml == "scale": if label: options = options + ", label(%s))" % label if hexbin: cmd.append(hexbinscatterelement % { "y": y, "x": x, "options": options }) else: cmd.append(scatterelement % { "y": y, "x": x, "options": options }) for i, fittype in enumerate( [linear, quadratic, cubic, loess]): if fittype: if applyfitto == "group": colorspec = ", color(%s)" % color else: colorspec = "" if numcharts > 1: cmd.append(aesth % {"atype": "color"}) cmd.append(fitlineelement % \ {"fittype": fittypekwd[i], "y": y, "x": x, "lineshape" : lineshapes[i], "color" : colorspec}) if boxplots: # bordered boxplot if single variable chart cmd.append(graphendtemplate) cmd.append(graphstarttemplate % { "originandscale": "origin(15%, 0%), scale(75%,8%)" }) cmd.append("""GUIDE: axis(dim(1), ticks(null()))""") cmd.append("""COORD: rect(dim(1))""") cmd.append(oneboxplotelement % {"variable": x}) cmd.append(graphendtemplate) cmd.append(graphstarttemplate % { "originandscale": "origin(92%, 10%), scale(8%, 75%)" }) cmd.append("COORD: transpose(rect(dim(1)))") cmd.append("""GUIDE: axis(dim(1), ticks(null()))""") cmd.append(oneboxplotelement % {"variable": y}) cmd.append(graphendtemplate) else: if categorical != "boxplot": cmd.append(categoricalelement % { "etype": elementmap[categorical], "y": y, "x": x }) else: if label: options = ", label(%s)" % label else: options = "" cmd.append(boxplotelement % { "y": y, "x": x, "options": options }) first = False if numcharts > 1: cmd.append(graphendtemplate) if npage == 0: cmd.append(pageendtemplate) cmd.append(endgpl) spss.Submit(cmd)
def weightedkappaextension(variables, wttype=1, cilevel=95): varnames = expandvarnames(variables) caption = varnames[0] + _(" vs. ") + varnames[1] vardict = spssaux.VariableDict(varnames) if len(vardict) != len(varnames): spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""An invalid variable has been specified. This command is not executed.""" )) spss.EndProcedure() elif len(varnames) != 2: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Exactly two variables must be specified. This command is not executed.""" )) spss.EndProcedure() else: try: warntext = [] if cilevel < 50: warntext.append( _("CILEVEL cannot be less than 50%. It has been set to 50%." )) cilevel = 50 if cilevel > 99.999: warntext.append( _("CILEVEL cannot be greater than 99.999%. It has been set to 99.999%." )) cilevel = 99.999 if cilevel == int(cilevel): cilevel = int(cilevel) if wttype != 1: if wttype != 2: warntext.append( _("WTTYPE must be 1 or 2. It has been set to 1.")) wttype = 1 varlist = varnames[0] + ' ' + varnames[1] spss.Submit("PRESERVE.") tempdir = tempfile.gettempdir() spss.Submit("""CD "%s".""" % tempdir) wtvar = spss.GetWeightVar() if wtvar != None: spss.Submit(r""" COMPUTE %s=RND(%s).""" % (wtvar, wtvar)) spss.Submit(r""" EXECUTE.""") maxloops = 2 * spss.GetCaseCount() spss.Submit("""SET PRINTBACK=OFF MPRINT=OFF MXLOOPS=%s.""" % maxloops) activeds = spss.ActiveDataset() if activeds == "*": activeds = "D" + str(random.uniform(.1, 1)) spss.Submit("DATASET NAME %s" % activeds) tmpvar1 = "V" + str(random.uniform(.1, 1)) tmpvar2 = "V" + str(random.uniform(.1, 1)) tmpvar3 = "V" + str(random.uniform(.1, 1)) tmpvar4 = "V" + str(random.uniform(.1, 1)) tmpvar5 = "V" + str(random.uniform(.1, 1)) tmpvar6 = "V" + str(random.uniform(.1, 1)) tmpdata1 = "D" + str(random.uniform(.1, 1)) tmpdata2 = "D" + str(random.uniform(.1, 1)) omstag1 = "T" + str(random.uniform(.1, 1)) omstag2 = "T" + str(random.uniform(.1, 1)) omstag3 = "T" + str(random.uniform(.1, 1)) omstag4 = "T" + str(random.uniform(.1, 1)) omstag5 = "T" + str(random.uniform(.1, 1)) omstag6 = "T" + str(random.uniform(.1, 1)) tmpfile1 = "F" + str(random.uniform(.1, 1)) tmpfile2 = "F" + str(random.uniform(.1, 1)) lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel spss.Submit(r""" DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1) filt = spssaux.GetSHOW("FILTER", olang="english") if filt != "No case filter is in effect": filtcond = filt.strip("(FILTER)") select = "SELECT IF " + str(filtcond) + "." spss.Submit("""%s""" % select) spss.Submit("""EXECUTE.""") spss.Submit("""USE ALL.""") banana = spssaux.getDatasetInfo(Info="SplitFile") if banana != "": warntext.append(_("This procedure ignores split file status.")) spss.Submit(r"""SPLIT FILE OFF.""") spss.Submit(r""" COUNT %s=%s (MISSING).""" % (tmpvar1, varlist)) spss.Submit(r""" SELECT IF %s=0.""" % tmpvar1) spss.Submit(r""" EXECUTE.""") validn = spss.GetCaseCount() if validn < 2: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""There are too few complete cases. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: spss.Submit(r""" AGGREGATE /OUTFILE=* MODE=ADDVARIABLES /%s=SD(%s) /%s=SD(%s).""" % (tmpvar2, varnames[0], tmpvar3, varnames[1])) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() datarow = cur.fetchone() cur.close() sd1 = datarow[-2] sd2 = datarow[-1] if min(sd1, sd2) == 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""All ratings are the same for at least one rater. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: if len(warntext) > 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) if len(warntext) == 1: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String("%s" % warntext[0]) spss.EndProcedure() if len(warntext) == 2: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s" % (warntext[0], warntext[1])) spss.EndProcedure() if len(warntext) == 3: spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable( "Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s \n" "%s" % (warntext[0], warntext[1], warntext[2])) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) spss.Submit(r""" DELETE VARIABLES %s %s.""" % (tmpvar2, tmpvar3)) spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile1, varlist, tmpvar4)) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Variables to Cases'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.Submit(r""" VARSTOCASES /MAKE %s FROM %s.""" % (tmpvar5, varlist)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) catdata = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() while True: datarow = cur.fetchone() if datarow is None: break catdata.append(datarow[-1]) cur.close() cats = list(set(catdata)) cattest = 0 if any(item != round(item) for item in cats): cattest = 1 spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Some ratings are not integers. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) elif min(cats) < 1.0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""Some ratings are less than 1. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) else: spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile2, tmpvar5, tmpvar6)) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN""" % tmpdata2) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Matrix'] /DESTINATION VIEWER=NO /TAG='"%s"'.""" % omstag3) spss.Submit(r""" MATRIX. GET x /FILE=%s /VARIABLES=%s %s. GET ratecats /FILE=%s /VARIABLES=%s. COMPUTE size=MMAX(ratecats). COMPUTE y=MAKE(size,size,0). LOOP i=1 to NROW(y). + LOOP j=1 to NCOL(y). + LOOP k=1 to NROW(x). + DO IF (x(k,1)=i and x(k,2)=j). + COMPUTE y(i,j)=x(k,3). + END IF. + END LOOP. + END LOOP. END LOOP. COMPUTE wttype=%s. COMPUTE wt=MAKE(NROW(y),NCOL(y),0). LOOP i=1 to NROW(y). + LOOP j=1 to NCOL(y). + DO IF wttype=1. + COMPUTE wt(i,j)=1-(ABS(i-j)/(size-1)). + ELSE IF wttype=2. + COMPUTE wt(i,j)=1-((i-j)/(NROW(y)-1))**2. + END IF. + END LOOP. END LOOP. COMPUTE n=MSUM(y). COMPUTE prop=y/n. COMPUTE p_i=RSUM(prop). COMPUTE p_j=CSUM(prop). COMPUTE w_i=(wt*T(p_j))*MAKE(1,size,1). COMPUTE w_j=MAKE(size,1,1)*(T(p_i)*wt). COMPUTE po=MSUM(wt&*prop). COMPUTE pe=MSUM(MDIAG(p_i)*wt*MDIAG(p_j)). COMPUTE kstat=(po-pe)/(1-pe). COMPUTE var0=(T(p_i)*((wt-(w_i+w_j))&**2)*T(p_j)-pe**2)/(n*(1-pe)**2). DO IF var0>=0. + COMPUTE ase0=SQRT(var0). ELSE. + COMPUTE ase0=-1. END IF. DO IF ase0>0. + COMPUTE z=kstat/ase0. + COMPUTE sig=1-CHICDF(z**2,1). ELSE. + COMPUTE z=-1. + COMPUTE sig=-1. END IF. COMPUTE var1=(MSUM((prop&*((wt-(w_i+w_j)&*(1-kstat))&**2)))-(kstat-pe*(1-kstat))**2)/(n*(1-pe)**2). DO IF var1>=0. + COMPUTE ase1=SQRT(var1). ELSE. + COMPUTE ase1=-1. END IF. SAVE {wttype,kstat,ase1,z,sig,ase0} /OUTFILE=%s /VARIABLES=wttype,kstat,ase1,z,sig,ase0. END MATRIX.""" % (tmpfile1, varlist, tmpvar4, tmpfile2, tmpvar5, wttype, tmpdata2)) spss.Submit(r""" OMSEND TAG=['"%s"'].""" % omstag3) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2) spss.Submit(r""" DO IF ase0=-1. + RECODE z sig (-1=SYSMIS). END IF. EXECUTE. DELETE VARIABLES ase0. RECODE ase1 (-1=SYSMIS). COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel) spss.Submit(r""" FORMATS kstat ase1 z sig lower upper (F11.3). VARIABLE LABELS kstat %s.""" % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase1 %s.""" % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s.""" % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) if wttype == 1: spss.Submit(r""" VARIABLE LABELS wttype %s.""" % _smartquote(_("""Linear"""))) if wttype == 2: spss.Submit(r""" VARIABLE LABELS wttype %s.""" % _smartquote(_("""Quadratic"""))) spss.Submit(r""" EXECUTE. """) spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag4) spss.Submit(r""" OMS /SELECT TEXTS /IF COMMANDS=['Weighted Kappa'] LABELS=['Active Dataset'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag5) if len(warntext) > 0: spss.Submit(r""" OMS /SELECT HEADINGS /IF COMMANDS=['Weighted Kappa'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag6) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() data = cur.fetchone() cur.close() spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa") table = spss.BasePivotTable(_("Weighted Kappa"), "Kappa", caption=caption) table.SimplePivotTable(rowdim = _("Weighting"), rowlabels = [CellText.String(spss.GetVariableLabel(0))], coldim = "", collabels = [spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3),spss.GetVariableLabel(4), \ spss.GetVariableLabel(5),spss.GetVariableLabel(6)], cells = [data[1],data[2],data[3],data[4],data[5],data[6]]) spss.EndProcedure() if len(warntext) > 0: spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag6) finally: try: spss.Submit(r""" DATASET CLOSE %s.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds) if validn >= 2: if min(sd1, sd2) > 0: if cattest == 0: if min(cats) >= 1: spss.Submit(r""" OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5)) spss.Submit(r""" DATASET CLOSE %s.""" % tmpdata2) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile2) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile1) except: pass spss.Submit(r""" RESTORE. """)
def fleisskappaextension(variables, cilevel=95): varnames = expandvarnames(variables) vardict = spssaux.VariableDict(varnames) if len(vardict) != len(varnames): spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""An invalid variable has been specified. This command is not executed.""" )) spss.EndProcedure() elif len(varnames) < 2: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""At least two variables must be specified. This command is not executed.""" )) spss.EndProcedure() else: try: warntext = [] if cilevel < 50: warntext.append( _("CILEVEL cannot be less than 50%. It has been reset to 50%." )) cilevel = 50 if cilevel > 99.999: warntext.append( _("CILEVEL cannot be greater than 99.999%. It has been reset to 99.999%." )) cilevel = 99.999 if cilevel == int(cilevel): cilevel = int(cilevel) varlist = varnames[0] for i in range(1, len(varnames)): varlist = varlist + ' ' + varnames[i] spss.Submit("PRESERVE.") tempdir = tempfile.gettempdir() spss.Submit("""CD "%s".""" % tempdir) wtvar = spss.GetWeightVar() if wtvar != None: spss.Submit(r""" COMPUTE %s=RND(%s).""" % (wtvar, wtvar)) spss.Submit(r""" EXECUTE.""") wtdn = GetWeightSum(varnames) else: wtdn = spss.GetCaseCount() maxloops = wtdn + 1 spss.Submit( """SET PRINTBACK=OFF MPRINT=OFF OATTRS=ENG MXLOOPS=%s.""" % maxloops) activeds = spss.ActiveDataset() if activeds == "*": activeds = "D" + str(random.uniform(.1, 1)) spss.Submit("DATASET NAME %s" % activeds) tmpvar1 = "V" + str(random.uniform(.1, 1)) tmpvar2 = "V" + str(random.uniform(.1, 1)) tmpvar3 = "V" + str(random.uniform(.1, 1)) tmpfile1 = "F" + str(random.uniform(.1, 1)) tmpfile2 = "F" + str(random.uniform(.1, 1)) tmpdata1 = "D" + str(random.uniform(.1, 1)) tmpdata2 = "D" + str(random.uniform(.1, 1)) tmpdata3 = "D" + str(random.uniform(.1, 1)) omstag1 = "T" + str(random.uniform(.1, 1)) omstag2 = "T" + str(random.uniform(.1, 1)) omstag3 = "T" + str(random.uniform(.1, 1)) omstag4 = "T" + str(random.uniform(.1, 1)) omstag5 = "T" + str(random.uniform(.1, 1)) omstag6 = "T" + str(random.uniform(.1, 1)) lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel spss.Submit(r""" DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1) filt = spssaux.GetSHOW("FILTER", olang="english") if filt != "No case filter is in effect": filtcond = filt.strip("(FILTER)") select = "SELECT IF " + str(filtcond) + "." spss.Submit("""%s""" % select) spss.Submit("""EXECUTE.""") spss.Submit("""USE ALL.""") banana = spssaux.getDatasetInfo(Info="SplitFile") if banana != "": warntext.append(_("This command ignores split file status.")) spss.Submit(r"""SPLIT FILE OFF.""") spss.Submit(r""" COUNT %s=%s (MISSING).""" % (tmpvar1, varlist)) spss.Submit(r""" SELECT IF %s=0.""" % tmpvar1) spss.Submit(r""" EXECUTE. MISSING VALUES ALL ().""") validn = spss.GetCaseCount() if wtvar == None: spss.Submit(r""" SAVE OUTFILE=%s.""" % tmpfile1) else: spss.Submit(r""" DO IF %s >= 1.""" % wtvar) spss.Submit(r""" + LOOP #i=1 TO %s.""" % wtvar) spss.Submit(r""" XSAVE OUTFILE=%s /KEEP=%s /DROP=%s.""" % (tmpfile1, varlist, wtvar)) spss.Submit(r""" + END LOOP. END IF. EXECUTE. """) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Variables to Cases'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag1) spss.Submit(r""" VARSTOCASES /MAKE %s FROM %s.""" % (tmpvar2, varlist)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag1) catdata = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() while True: datarow = cur.fetchone() if datarow is None: break catdata.append(datarow[-1]) cur.close() cats = list(set(catdata)) ncats = len(cats) nraters = len(varnames) neededn = max(ncats, nraters) if validn < neededn: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""There are too few complete cases. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) elif ncats < 2: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( _("""All ratings are the same. This command is not executed.""" )) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) else: if len(warntext) > 0: spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag2) if len(warntext) == 1: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String("%s" % warntext[0]) spss.EndProcedure() if len(warntext) == 2: spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table = spss.BasePivotTable("Warnings ", "Warnings") table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") table[(rowLabel, )] = CellText.String( "%s \n" "%s" % (warntext[0], warntext[1])) spss.EndProcedure() spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag2) spss.Submit(r""" AGGREGATE /OUTFILE=%s /BREAK=%s /%s=N.""" % (tmpfile2, tmpvar2, tmpvar3)) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata2) spss.Submit(r""" DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata3) spss.Submit(r""" OMS /SELECT ALL EXCEPT=WARNINGS /IF COMMANDS=['Matrix'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag3) spss.Submit(r""" MATRIX. GET x /FILE=%s /VARIABLES=%s. GET ratecats /FILE=%s /VARIABLES=%s. COMPUTE n=NROW(x). COMPUTE c=NROW(ratecats). COMPUTE y=MAKE(n,c,0). LOOP i=1 to n. + LOOP j=1 to NCOL(x). + LOOP k=1 to c. + DO IF x(i,j)=ratecats(k). + COMPUTE y(i,k)=y(i,k)+1. + END IF. + END LOOP. + END LOOP. END LOOP. COMPUTE k=NCOL(x). COMPUTE pe=MSUM((CSUM(y)/MSUM(y))&**2). COMPUTE pa=MSSQ(y)/(NROW(y)*k*(k-1))-(1/(k-1)). COMPUTE kstat=(pa-pe)/(1-pe). COMPUTE cp=(CSSQ(y)-CSUM(y))&/((k-1)&*CSUM(y)). COMPUTE pj=CSUM(y)/MSUM(y). COMPUTE one=MAKE(1,NCOL(pj),1). COMPUTE qj=one-pj. COMPUTE kj=(cp-pj)&/qj. COMPUTE num=2*((pj*t(qj))**2-MSUM(pj&*qj&*(qj-pj))). COMPUTE den=n*k*(k-1)*((pj*t(qj))**2). COMPUTE ase=SQRT(num/den). COMPUTE z=kstat/ase. COMPUTE sig=1-CHICDF(z**2,1). SAVE {kstat,ase,z,sig} /OUTFILE=%s /VARIABLES=kstat,ase,z,sig. COMPUTE asej=MAKE(1,c,SQRT(2/(n*k*(k-1)))). COMPUTE zj=kj&/asej. COMPUTE sigj=one-CHICDF(zj&**2,1). SAVE {ratecats,t(cp),t(kj),t(asej),t(zj),t(sigj)} /OUTFILE=%s /VARIABLES=category,cp,kstat,ase,z,sig. END MATRIX.""" % (tmpfile1, varlist, tmpfile2, tmpvar2, tmpdata2, tmpdata3)) spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag3) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2) spss.Submit(r""" COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" FORMATS kstat ase z sig lower upper (F11.3). VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s. """ % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) spss.Submit(r""" EXECUTE. """) try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() data1 = cur.fetchone() cur.close() collabels1=[spss.GetVariableLabel(0),spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \ spss.GetVariableLabel(4),spss.GetVariableLabel(5)] celldata1 = [ data1[0], data1[1], data1[2], data1[3], data1[4], data1[5] ] spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata3) spss.Submit(r""" COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel) spss.Submit(r""" FORMATS category (F10.0) cp kstat ase z sig lower upper (F11.3). VARIABLE LABELS category %s. """ % _smartquote(_("""Rating Category"""))) spss.Submit(r""" VARIABLE LABELS cp %s. """ % _smartquote(_("""Conditional Probability"""))) spss.Submit(r""" VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa"""))) spss.Submit(r""" VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error"""))) spss.Submit(r""" VARIABLE LABELS z %s. """ % _smartquote(_("""Z"""))) spss.Submit(r""" VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value"""))) spss.Submit(r""" VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel))) spss.Submit(r""" VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel))) spss.Submit(r""" EXECUTE.""") spss.Submit(r""" OMS /SELECT TABLES /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag4) spss.Submit(r""" OMS /SELECT TEXTS /IF COMMANDS=['Fleiss Kappa'] LABELS=['Active Dataset'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag5) if len(warntext) > 0: spss.Submit(r""" OMS /SELECT HEADINGS /IF COMMANDS=['Fleiss Kappa'] /DESTINATION VIEWER=NO /TAG = '"%s"'.""" % omstag6) n = spss.GetCaseCount rlabels = [] data2 = [] try: cur = spss.Cursor(isBinary=False) except: cur = spss.Cursor() for i in range(0, spss.GetCaseCount()): datarow = cur.fetchone() data2.append(datarow[1:]) rlabels.append(datarow[0]) cur.close() def _flatten(seq): for item in seq: if spssaux._isseq(item): for subitem in _flatten(item): yield subitem else: yield item data2 = [item for item in _flatten(data2)] spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa") table1 = spss.BasePivotTable(_("Overall Kappa"), "Overall Kappa") table1.SimplePivotTable(rowdim=_(""), rowlabels=[CellText.String("Overall")], coldim="", collabels=collabels1, cells=celldata1) if any(item != round(item) for item in rlabels): caption = (_( "Non-integer rating category values are truncated for presentation." )) else: caption = ("") table2 = spss.BasePivotTable( _("Kappas for Individual Categories"), _("Individual Category Kappa Statistics"), caption=caption) rowlabels = [(CellText.String("{:>9.0f}".format(rlabels[i]))) for i in range(len(rlabels))] collabels=[spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \ spss.GetVariableLabel(4),spss.GetVariableLabel(5),spss.GetVariableLabel(6), \ spss.GetVariableLabel(7)] table2.SimplePivotTable(rowdim=_(" Rating Category"), rowlabels=rowlabels, coldim="", collabels=collabels, cells=data2) spss.EndProcedure() if len(warntext) > 0: spss.Submit(r""" OMSEND TAG = ['"%s"'].""" % omstag6) finally: try: spss.Submit(""" DATASET CLOSE %s.""" % tmpdata1) spss.Submit(r""" DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds) if validn >= neededn: if ncats >= 2: spss.Submit(""" OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5)) spss.Submit(""" DATASET CLOSE %s.""" % tmpdata2) spss.Submit(""" DATASET CLOSE %s.""" % tmpdata3) spss.Submit(""" ERASE FILE=%s.""" % tmpfile1) spss.Submit(r""" ERASE FILE=%s.""" % tmpfile2) except: pass spss.Submit(""" RESTORE. """)
def recode(varlist, recodes, stringsize=None, makevaluelabels=True, copyvariablelabels=True, useinputvallabels=False, suffix="", prefix=""): vardict = spssaux.VariableDict(caseless=True) isutf8 = spss.PyInvokeSpss.IsUTF8mode() ecutf8 = codecs.getencoder("utf_8") inputlist, outputlist, vartype = parsevarlist(varlist, vardict) if len(recodes) > 1: raise ValueError( _("The RECODES subcommand must consist of a single, quoted specification" )) # recodespec is a list of textual recode syntax, one item per value set # vldefs is a dictionary with keys the target values # and values the input codes # inputdict is a dictionary with keys the target values # and values a list of the input codes recodespec, vldefs, inputdict = parserecodes(recodes[0], vartype, stringsize) valuelabelmessage = checklabelconsistency(inputlist, vardict) if stringsize: alter = [] create = [] for v in outputlist: try: if vardict[v].VariableType != stringsize: alter.append(v) except: create.append(v) if create: spss.Submit("STRING %s (A%s)." % (" ".join(create), stringsize)) if alter: spss.Submit("ALTER TYPE %s (A%s)" % (" ".join(alter), stringsize)) spss.Submit( """RECODE %s %s INTO %s.""" % (" ".join(inputlist), " ".join(recodespec), " ".join(outputlist))) # generate variable labels if requested if copyvariablelabels: if prefix and not prefix.endswith(" "): prefix = prefix + " " if suffix and not suffix.startswith(" "): suffix = " " + suffix for vin, vout in zip(inputlist, outputlist): spss.Submit("""VARIABLE LABEL %s %s.""" % \ (vout, _smartquote(prefix + vardict[vin].VariableLabel + suffix, True))) # generate value labels if requested # all values for given target are merged but else clause is omitted # VALUE LABELS syntax quotes values regardless of variable type # vldefs is a dictionary with keys of the output values and # values a string listing the input values. If copying value labels # the first input variable is used as the source. if makevaluelabels: if useinputvallabels: vldefs = makevallabels(vldefs, inputdict, vardict[inputlist[0]].ValueLabels, isutf8, ecutf8) # ensure that copy as target does not generate a value label copyset = set() for target in vldefs: if target.lower() == "copy": copyset.add(target) for c in copyset: del (vldefs[c]) #spss.Submit(r"""VALUE LABELS %s %s.""" % (" ".join(outputlist), \ #" ".join([_smartquote(val, vartype == 2) + " " + _smartquote(label, True) for val, label in vldefs.items()]))) spss.Submit(r"""VALUE LABELS %s %s.""" % (" ".join(outputlist), \ " ".join([val + " " + _smartquote(label, True) for val, label in list(vldefs.items())]))) if valuelabelmessage: print(valuelabelmessage)
def buildspec(dims, dss, catvars, totvars, encoding, finalweight): """create raking specification and return control variable list and totals list dims is a list of dimension variables, categories, and totals dss, catvars, and totvars are alternative ways of specifying the same information dss is a list of dataset names, catvars a list of category variable names, and totvars a list of the corresponding control totals""" vardict = spssaux.VariableDict() if finalweight in vardict: raise ValueError(_("FINALWEIGHT cannot specify an existing variable name")) ctlvars= [] ctltotals = [] activedsname = spss.ActiveDataset() if activedsname == "*": #unnamed activedsname = "D" + str(random.uniform(.1,1)) spss.Submit("DATASET NAME %s" % activedsname) for dim in dims: if dim: v = dim if not isinstance(v[0], str): vvname = str(v[0], encoding) else: vvname = v[0] if not v[0] in vardict: raise ValueError(_("A control total variable does not exist: %s") % vvname) if not vardict[v[0]].VariableType == 0: raise ValueError(_("A nonnumeric variable was specified for a control dimension: %s") % vvname) if len(v) == 1 or not len(v) % 2 == 1: raise ValueError(_("An invalid set of values and totals was found for a control dimension: %s") % " ".join(v)) ctlvars.append(v[0]) #ctltotals.append(dict([(float(k),float(v)) for k,v in zip(v[1::2], v[2::2])])) try: # category totals can be numerical expressions # convert to a value after insuring that all numbers are floats ctltotals.append(dict([(float(k), float(eval(decimalize(v)))) for k,v in zip(v[1::2], v[2::2])])) except: raise ValueError(_("""Invalid category or category total for variable: %s""") % vvname) for i, ds in enumerate(dss): catvar = catvars[i] totvar = totvars[i] if not any([ds, catvar, totvar]): continue if ds and (catvar is None or totvar is None): raise ValueError(_("""A dataset was specified without the category or totals variable names: %s""") % ds) try: spss.Submit("DATASET ACTIVATE %s" % ds) dta = spssdata.Spssdata([catvar, totvar], names=False).fetchall() ctlvars.append(catvar) # A dataset value might be simply numeric or a string expression ctltotals.append(dict([(float(k), float(eval(decimalize((v))))) for k,v in dta])) except: # error conditions include nonexistant dataset and variables and type problems spss.Submit("DATASET ACTIVATE %s" % activedsname) raise spss.Submit("DATASET ACTIVATE %s" % activedsname) if not ctlvars: raise ValueError(_("""No raking specifications were given""")) # check for duplicate control variables ctllc = [v.lower() for v in ctlvars] ctlset = set(ctllc) if len(ctllc) != len(ctlset): # any duplicates? for v in ctlset: ctllc.remove(v) raise ValueError(_("""Duplicate control variables were specified: %s""") % ", ".join(set(ctllc))) return ctlvars, ctltotals
def Run(args): """Execute the STATS CORRELATIONS extension command""" args = args[list(args.keys())[0]] # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ## ###SpssClient._heartBeat(False) #except: #print 'debug failed' oobj = Syntax([ Template("VARIABLES", subc="", ktype="existingvarlist", var="variables", islist=True), Template("VARIABLES", subc="WITH", ktype="existingvarlist", var="withvars", islist=True), Template("CONFLEVEL", subc="OPTIONS", ktype="float", var="clevel", vallist=(25., 99.999)), Template("METHOD", subc="OPTIONS", ktype="str", var="method", vallist=("fisher", "bootstrap")), Template("LISTWISE", subc="MISSING", ktype="bool", var="listwise"), Template("PAIRWISE", subc="MISSING", ktype="bool", var="pairwise"), Template("INCLUDE", subc="MISSING", ktype="bool", var="include"), Template("EXCLUDE", subc="MISSING", ktype="bool", var="exclude"), Template("HELP", subc="", ktype="bool") ]) #enable localization global _ try: _("---") except: def _(msg): return msg # A HELP subcommand overrides all else if "HELP" in args: #print helptext helper() else: processcmd(oobj, args, docorr, vardict=spssaux.VariableDict())
import time start_time = time.time() """ Checks availability of variables in dataset and makes new dataset from selection variable and list of variables. """ from vars_and_functions import global_vars as gb from vars_and_functions import reverse_items import spss import spssaux import pandas as pd import time log = gb.folder + 'out/check_variables_log.txt' syntax_save = gb.folder + 'out/check_variables.sps' spssaux.OpenDataFile(gb.org_data) test = spssaux.VariableDict() print(test) vars = ['x4_6aevyt01', 'x4_6aevyt02'] for var in reversed(vars): i = test[var].index dataCursor = spss.Cursor([i]) oneVar = dataCursor.fetchall() #extending the example to get the actual list of values. uniqueList = list((set(oneVar))) uniq_vals = [ int(x[0]) for x in uniqueList if x[0] != None and x[0] not in gb.missing_int ] uniq_vals_rev = reversed(uniq_vals) dataCursor.close()
def Run(args): """Execute the STATS REGRESS PLOT command""" args = args[list(args.keys())[0]] ###print args #debug oobj = Syntax([ Template("YVARS", subc="", ktype="existingvarlist", var="yvars", islist=True), Template("XVARS", subc="", ktype="existingvarlist", var="xvars", islist=True), Template("COLOR", subc="", ktype="existingvarlist", var="color"), Template("SIZE", subc="", ktype="existingvarlist", var="size"), Template("SHAPE", subc="", ktype="existingvarlist", var="shape"), Template("LABEL", subc="", ktype="existingvarlist", var="label"), Template("LINEAR", subc="FITLINES", ktype="bool", var="linear"), Template("QUADRATIC", subc="FITLINES", ktype="bool", var="quadratic"), Template("CUBIC", subc="FITLINES", ktype="bool", var="cubic"), Template("LOESS", subc="FITLINES", ktype="bool", var="loess"), Template("IGNORE", subc="FITLINES", ktype="str", var="ignore"), Template("APPLYTO", subc="FITLINES", ktype="str", var="applyfitto", vallist=["total", "group"]), Template("CATEGORICAL", subc="OPTIONS", ktype="str", var="categorical", vallist=["bars", "lines", "boxplot"]), Template("GROUP", subc="OPTIONS", ktype="int", var="group"), Template("BOXPLOTS", subc="OPTIONS", ktype="bool", var="boxplots"), Template("HEXBIN", subc="OPTIONS", ktype="bool", var="hexbin"), Template("TITLE", subc="OPTIONS", ktype="literal", var="title"), Template("INDENT", subc="OPTIONS", ktype="int", var="indent", vallist=[0, 50]), Template("YSCALE", subc="OPTIONS", ktype="int", var="yscale", vallist=[50, 100]), Template("PAGEX", subc="OPTIONS", ktype="float", var="pagex", vallist=[1]), Template("PAGEY", subc="OPTIONS", ktype="float", var="pagey", vallist=[1]), Template("HELP", subc="", ktype="bool") ]) # ensure localization function is defined global _ try: _("---") except: def _(msg): return msg # A HELP subcommand overrides all else if "HELP" in args: #print helptext helper() else: processcmd(oobj, args, plots, vardict=spssaux.VariableDict())
def genVarMacro(variables, countvalues, order, macroname, mincount, minpercent, maxcount, maxpercent, separator, weightvar, missing): """Generate a macro listing the variables in order of the weighted counts variables is the list of candidate variables countvalues is a list of the values to be counted order is a or d for the variable order in the macro macroname is the name of the macro to be generated mincount and minpercent specify minimum thresholds for including a variable (<) maxcount and maxpercent specific maximum thresholds (>=) separator is the variable name separator to use in the macro definition weightvar is the name of the weight variable or None missing specifies the missing value treatment""" if weightvar: varnamesAndWeight = variables + [weightvar] else: varnamesAndWeight = variables nvar = len(variables) if len(separator) == 0: separator = " " vardict = spssaux.VariableDict(variables) types = set(min(v.VariableType, 1) for v in vardict) if len(types) > 1: raise ValueError(_("""Variable must all be of the same type""")) vartypes = types.pop() if vartypes == 0: try: countvalues = [float(v) for v in countvalues] except: raise ValueError(_("""A non-numeric value to count was specified for a numeric variable""")) else: countvalues = [v.rstrip() for v in countvalues] countvalues = set(countvalues) curs = spssdata.Spssdata(indexes=varnamesAndWeight, names=False, convertUserMissing=False, omitmissing=missing == 'exclude') counts = {} # a dictionary of weighted counts with variable names as keys # populate counts as all zeros so that all variables will # appear in the dictionary for later use for v in variables: counts[v] = 0 w = 1.0 wsum = 0 minpercent = minpercent / 100. if maxpercent is not None: maxpercent = maxpercent / 100. # calculate weighted count of counted values for each variable # string variables must be trimmed to match counted values list for case in curs: if weightvar: w = case[nvar] if w is None: w = 0.0 wsum += w # accumulate weight if vartypes == 1: case = [val.rstrip() for val in case[:nvar]] # don't include any weight variable for i in range(nvar): if case[i] in countvalues: counts[variables[i]] = counts[variables[i]] + w