def getsplitinfo(): """Return list of current split variables and splitmode If no splits, return is [], None""" splitvarlist = spss.GetSplitVariableNames() if len(splitvarlist) == 0: return [], None else: splittype = spssaux.getShow("split", olang="english") if splittype.lower().startswith("layer"): splittype="layered" else: splittype="separate" return splitvarlist, splittype
def dooptbinex(target, binvars, suffix=["_bin"], minsize=10, alpha=.05, overwrite=False, syntaxoutfile=None, contintervals=10, treetable=True, recodetable=True, execute=True): """Execute STATS OPTBINEX command""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use #SpssClient._heartBeat(False) #except: #pass # Check that TREES procedure is licensed if possible treesavailable = True try: # no client in external mode SpssClient.StartClient() treesavailable = SpssClient.IsOptionAvailable(SpssClient.LicenseOption.TREEVIEW) except: pass finally: SpssClient.StopClient() if not treesavailable: raise ValueError(_("""Error: This command requires the Decision Trees option, which is not licensed.""")) if syntaxoutfile is None and not execute: raise ValueError(_("""No syntax output was specified, and execution was not requested. There is nothing to do.""")) suffix = "".join(suffix) #tokenlist may come through with mult elements outnames = [v + suffix for v in binvars] toolong = [v for v in outnames if len(v) > 64] # wrong metric for Unicode if toolong: raise ValueError(_("""The following new names exceed 64 bytes. Please choose a shorter suffix or rename the input variables:\n%s""") % " ".join(toolong)) vardict = spssaux.VariableDict() if not overwrite: existingnames = set([v.lower() for v in vardict.variables]) onames = existingnames.intersection(set([v.lower() for v in outnames])) if onames: raise ValueError(_("""Error: The following variables would be overwritten:\n%s""")\ % " ".join(onames)) xmlws = "x" + str(random.uniform(.1,1)) visible = (treetable and "yes") or "no" tempdir = tempfile.gettempdir() rulesfilespec = os.path.join(tempdir, "F" + str(random.uniform(.1,1))) treetemplate = """TREE %(target)s BY %(indvar)s /TREE DISPLAY=NONE /PRINT TREETABLE /GAIN SUMMARYTABLE=NO /RULES OUTFILE="%(rulesfilespec)s" /GROWTHLIMIT MAXDEPTH=1 MINPARENTSIZE=10 MINCHILDSIZE=%(minsize)s /CHAID ALPHASPLIT=%(alpha)s INTERVALS=%(contintervals)s. """ if syntaxoutfile is None: insertfile = os.path.join(tempdir, "F" + str(random.uniform(.1,1))) else: insertfile = syntaxoutfile insertfilef = open(insertfile, "wb") # run TREES for each independent variable and accumulate resulting tranformations oattrs = spssaux.getShow("OATTRS") empty = True failedvars = [] outvars = [] outlabels = [] try: for indvar in binvars: # set OMS to use language-invariant text spss.Submit("""set oattrs=eng. oms select tables /if subtypes='TreeTable' /destination format=oxml xmlworkspace="%(xmlws)s" viewer=%(visible)s /tag = "%(xmlws)s". """ % locals()) spss.Submit(treetemplate % locals()) spss.Submit("""omsend tag="%(xmlws)s".""" % locals()) labels = spss.EvaluateXPath(xmlws, "/", """//pivotTable//group[@text_eng="Primary Independent Variable"]/category[@text_eng="Split Values"]/cell/@text""") if not labels: failedvars.append(indvar) continue empty = False outputname = indvar + suffix if recodetable: outvars.append(outputname) outlabels.append(labels) definitions = getrules(rulesfilespec, outputname, labels, vardict[indvar].VariableLabel) # also removes temporary file insertfilef.writelines([line + "\n" for line in definitions]) finally: insertfilef.close() spss.Submit("SET OATTRS=%s" % oattrs) # restore setting spss.DeleteXPathHandle(xmlws) if execute and not empty: spss.Submit("""INSERT FILE="%s".""" % insertfile) if syntaxoutfile is None: os.remove(insertfile) if failedvars or recodetable: from spss import CellText StartProcedure(_("Extended Optimal Binning"), "STATSOPTBINEX") if failedvars: wtable = spss.BasePivotTable("Warnings ", "Warnings") wtable.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True) rowLabel = CellText.String("1") wtable[(rowLabel,)] = \ spss.CellText.String(_("""These variables could not be binned. New variables were not created for them.\n%s""") % " ".join(failedvars)) if recodetable and not empty: table = spss.BasePivotTable(_("Variable Binning"), "OPTBIN") table.Append(spss.Dimension.Place.row, _("Variable")) table.Append(spss.Dimension.Place.row, _("Value")) table.Append(spss.Dimension.Place.column, _("Definition"), hideName=True) reccoldef = CellText.String("Definition") for i in range(len(outvars)): var = CellText.String(outvars[i]) for j, val in enumerate(outlabels[i]): rec = CellText.String(j) recval = CellText.String(val) table[(var, rec, reccoldef)] = recval spss.EndProcedure()
def arguments(name, dir, mode, proc): msg = None if not name: # name msg = "Missing run name." else: name = name.strip() m = re.match(r"^[\w -]+$", name) # check name if not m: msg = "Invalid name." if not msg: cwd = spssaux.getShow("DIRECTORY") # spss current working directory if not dir: path = os.path.join(cwd, name) # spss current working directory else: # specified directory dir = dir.strip() m = re.match(r'^"([^"]+)"$', dir) # remove double quotes if m: dir = m.group(1).strip() else: m = re.match(r"^'([^']+)'$", dir) # remove single quotes if m: dir = m.group(1).strip() m = re.match(r"^[~\.\\/:\w -]+$", dir) # check dir if not m: msg = "Invalid directory." if not msg: path = os.path.join(os.path.normpath(dir), name) if not msg: if mode: mode = mode.strip() if not re.match(r"^(debug|test)$", mode, flags=re.IGNORECASE): # mode msg = "Mode error." if not msg: setup = "" # get the setup with open(path + ".set", "r") as f: for line in f: setup += line if not setup: msg = "Missing setup." if not msg: m = re.search(r"(^|;)\s*datain\s+([^;]+);", setup, flags=re.IGNORECASE) # datain if m: datain = m.group(2).strip() if proc != "combine": m = re.match(r'^"([^"]+)"', datain) # remove double quotes if m: datain = m.group(1).strip() else: m = re.match(r"^'([^']+)'", datain) # remove single quotes if m: datain = m.group(1).strip() else: m = re.match(r"^([^ ]+)", datain) # remove subsequent file names if m: datain = m.group(1).strip() m = re.match(r"^[~\.\\/:\w -]+$", datain) # check name if not m: msg = "Invalid datain." else: msg = "Missing datain." if not msg: m = re.search(r"(^|;)\s*dataout\s+([^;]+);", setup, flags=re.IGNORECASE) # dataout if m: dataout = m.group(2).strip() m = re.match(r"^(.+)\s+(all|con|concat|concatenate)$", dataout, flags=re.IGNORECASE) # rmove all or concatenate if m: dataout = m.group(1).strip() m = re.match(r'^"([^"]+)"$', dataout) # remove double quotes if m: dataout = m.group(1).strip() else: m = re.match(r"^'([^']+)'$", dataout) # remove single quotes if m: dataout = m.group(1).strip() m = re.match(r"^[~\.\\/:\w -]+$", dataout) # check name if not m: msg = "Invalid dataout." if not msg: if sys.platform.startswith("win"): # windows m = re.match(r"^([a-zA-Z]:|[\\/])", dataout) else: # linux m = re.match("^(~|/)", dataout) if not m: # not full path dataout = os.path.join( cwd, dataout) # prefix the current working directory dataout = os.path.normpath(dataout) # normalize the path else: dataout = None # no dataout if proc == "combine": msg = "Missing dataout." if msg: print msg # print the error message return None return [path, datain, dataout, setup]
def putdata(name=None, dir=None, mode=None, dataout=None, impl=None, mult=None): msg = None if not name: # name msg = "Missing run name." else: m = re.match(r"^\s*(\w[\w\-]*)\s*$", name) if m: name = m.group(1) else: msg = "Run name error." if not msg: cwd = spssaux.getShow("DIRECTORY") # spss current working directory if not dir: path = os.path.join(cwd, name) # spss current working directory else: # specified directory m = re.match(r'^\s*"([~\.\\/:\w\- ]*)"\s*$', dir) # double quotes if m: dir = m.group(1).strip() else: m = re.match(r"^\s*'([~\.\\/:\w\- ]*)'\s*$", dir) # single quotes if m: dir = m.group(1).strip() else: m = re.match(r"^\s*([~\.\\/:\w\-]*)\s*", dir) # no quotes if m: dir = m.group(1) else: msg = "Invalid directory." if not msg: path = os.path.join(os.path.normpath(dir), name) if not msg: # mode if mode: m = re.match(r"^\s*(debug|test)\s*$", mode, flags=re.IGNORECASE) if m: mode = m.group(1).lower() else: msg = "Mode error." if not msg: # dataout if not dataout: msg = "Missing dataout." else: m = re.match(r'^\s*"([~\.\\/:\w\- ]*)"\s*$', dataout) # double quotes if m: dataout = m.group(1).strip() else: m = re.match(r"^\s*'([~\.\\/:\w\- ]*)'\s*$", dataout) # single quotes if m: dataout = m.group(1).strip() else: m = re.match(r"^\s*([~\.\\/:\w\-]*)\s*", dataout) # no quotes if m: dataout = m.group(1) else: msg = "Invalid dataout." if not msg: if sys.platform.startswith("win"): # windows m = re.match(r"^([a-zA-Z]:|[\\/])", dataout) else: # linux m = re.match("^(~|/)", dataout) if not m: # not full path dataout = os.path.join( cwd, dataout) # prefix the current working directory dataout = os.path.normpath(dataout) # normalize the path if not msg: # impl if impl: m = re.match(r"^\s*(all|\d+)\s*$", impl, flags=re.IGNORECASE) if m: impl = m.group(1).lower() else: msg = "Invalid implicate." if not msg: # mult if mult: m = re.match(r"^\s*(all|\d+)\s*$", mult, flags=re.IGNORECASE) if m: mult = m.group(1).lower() else: msg = "Invalid multiple." if not msg: args = [] # execute putdata args.append("/dataout={0}".format(dataout)) if impl: args.append("/impl={0}".format(impl)) if mult: args.append("/mult={0}".format(mult)) rc = srclib.execute("putdata", path, mode, args) if rc != 0: msg = "Abnormal termination of putdata" else: if not os.path.exists(dataout + ".out"): # write the imputed data msg = "Missing " + dataout + ".out file" else: f = open(dataout + ".out", "r") cmd = f.read() f.close() spss.Submit(cmd) # execute the command if not mode: os.remove(dataout + ".imp") os.remove(dataout + ".out") if msg: print msg # print the error message