def mod_database(input_scale, prefix, start, stop): vars_in_file = spssaux.VariableDict().Variables df_dict = {} for i in range(start, stop): df = input_scale.reset_index() df['time'] = prefix + str(i) for col in gb.columns_to_use: df[col] = prefix + str(i) + df[col] df_dict[i] = df db = pd.concat(df_dict, ignore_index=True) diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist() db2 = db[db['items'].isin((vars_in_file))] spss.Submit(recode(db2)) rename_db = db2.loc[db['rename'].notnull(), ['items', 'rename']] for i, row in rename_db.iterrows(): orgname = row['items'] new_name = row['rename'] spss.Submit('RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format( orgname=orgname, new_name=new_name)) vars_in_file = spssaux.VariableDict().Variables diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist() vars_not_in_file = 'Variables not in file\n' + '\n'.join(diff_vars) + '\n' print( str(len(diff_vars)) + ' variables not in file. Check log.txt for specification\n') db = db[db['items'].isin((vars_in_file))] return db, vars_not_in_file
def get_median_cut(): cmd = '' cmd2 = '' input_dep = db_glob[db_glob['dependent'] == 1 & db_glob[version].notnull()] for dep in input_dep[version].unique(): db = input_dep['items'][input_dep[version] == dep] for time in range(start,stop): list_of_dep_items = [prefix + str(time) + var for var in db] cmd += 'COMPUTE {var}=mean.1({var_list}).\n'.format(var=prefix+str(time)+dep,var_list=','.join(list_of_dep_items)) cmd += 'EXECUTE.\n' #print(cmd) spss.Submit(cmd) list_of_dep = [prefix + str(start) + dep for dep in input_dep[version].unique()] tag, err = spssaux.CreateXMLOutput("""FREQUENCIES VARIABLES={vars} /STATISTICS=MEDIAN.\n""".format(vars=' '.join(list_of_dep))) spss.GetXmlUtf16(tag,folder+'/fest.xml') context = "/outputTree" xpath = "//category[@text='Median']/dimension[@text='Variables']/category[@variable='true']//@*[name()='number' or name()='varName']" median_list = spss.EvaluateXPath(tag, context, xpath) median_list = [[prefix + str(i) + median_list[n][4:],median_list[n+1]] for i in range(start,stop) for n in range(0,len(median_list),2)] for sublist in median_list: cmd2 += 'RECODE {var} (Lowest thru {mean}=0) (sysmis,77,88,99,0=sysmis) (else=1) into {var}_cut.\n'.format(var=sublist[0],mean=sublist[1]) cmd2 += 'EXECUTE.\n' #print(cmd2) spss.Submit(cmd2) return list_of_dep
def synthesize(name=None, dir=None, mode=None): proc = "synthesize" args = srclib.arguments(name, dir, mode, proc) # get the arguments if not args: return 1 path = args[0] datain = args[1] dataout = args[2] rc = srclib.metadata(datain, path) # get the metadata if rc != 0: return rc msg = None rc = srclib.execute("iveset", path, mode, None) # execute iveset if rc != 0: msg = "Abnormal termination of iveset" else: if not os.path.exists(path + ".inp"): # get the data msg = "Missing " + path + ".inp file" else: f = open(path + ".inp", "r") cmd = f.read() f.close() spss.Submit(cmd) if not mode: os.remove(path + ".inp") rc = srclib.execute("impute", path, mode, None) # execute impute if rc != 0: msg = "Abnormal termination of synthesize" else: if dataout: # output the synthesized data rc = srclib.execute("putdata", path, mode, None) # execute putdata if rc != 0: msg = "Abnormal termination of putdata" else: if not os.path.exists(dataout + ".out"): # write the synthesized data msg = "Missing " + dataout + ".out file" else: f = open(dataout + ".out", "r") cmd = f.read() f.close() spss.Submit(cmd) # execute the command if not mode: os.remove(dataout + ".imp") os.remove(dataout + ".out") if msg: if os.path.exists(path + ".log"): # copy the log print f = open(path + ".log", "r") print f.read() f.close() print msg # print the error message else: if not os.path.exists(path + ".lst"): # copy the listing msg = "Missing " + path + ".lst file" else: print f = open(path + ".lst", "r") print f.read() f.close()
def OpenExcelFile(self, existingfile, ext, sheet, readnames, assumedstrwidth, datasetname): spss.Submit( r"""get data /type=%s /file="%s" /sheet=name "%s" /readnames=%s /assumedstrwidth=%s.""" % (ext[1:], existingfile, sheet, readnames, assumedstrwidth)) if datasetname: spss.Submit("dataset name %s." % datasetname)
def dolabels(variables=None, varpattern=None, lblvars=None, lblpattern=None, execute=True, varsperpass=20, syntax=None): """Execute STATS VALLBLS FROMDATA""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(1) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ### ###SpssClient._heartBeat(False) #except: #pass try: vardict = spssaux.VariableDict(caseless=True) except: raise ValueError(_("""This command requires a newer version the spssaux module. \n It can be obtained from the SPSS Community website (www.ibm.com/developerworks/spssdevcentral)""")) varstolabel = resolve(vardict, _("variables to label"), variables, varpattern, stringonly=False) labelvars = resolve(vardict, _("label variables"), lblvars, lblpattern, stringonly=True) if len(varstolabel) == 0 or len(labelvars) == 0: raise ValueError(_("""No variables to label or no labelling variables were specified. If a pattern was used, it may not have matched any variables.""")) if len(labelvars) > 1 and len(labelvars) != len(varstolabel): raise ValueError(_("The number of label variables is different from the number of variables to label")) if min([vardict[item].VariableType for item in labelvars]) == 0: raise ValueError(_("""The label variables must all have type string""")) dsname = spss.ActiveDataset() if dsname == "*": raise ValueError(_("""The active dataset must have a dataset name in order to use this procedure""")) if syntax: syntax = syntax.replace("\\", "/") syntax = FileHandles().resolve(syntax) mkvl = Mkvls(varstolabel, labelvars, varsperpass, execute, syntax, vardict) for i in range(0, len(varstolabel), varsperpass): spss.Submit("""DATASET ACTIVATE %s""" % dsname) mkvl.doaggr(i) spss.Submit("""DATASET ACTIVATE %s""" % dsname) labelsyntax = mkvl.dolabels() if labelsyntax and execute: spss.Submit(labelsyntax) mkvl.report(labelsyntax) if labelsyntax and syntax: writesyntax(labelsyntax, syntax, mkvl)
def main(): spssaux.OpenDataFile(data) spss.SetOutput("off") spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') db_prefixed = mod_database(db_glob, prefix, start, stop) #print(recode_cut(db_prefixed)) spss.Submit(recode_cut(db_prefixed)) #print(make_long_exp_vars()) spss.Submit(make_long_exp_vars()) spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
def bbdesign(name=None, dir=None, mode=None): proc = "bbdesign" args = srclib.arguments(name, dir, mode, proc) # get the arguments if not args: return 1 path = args[0] datain = args[1] dataout = args[2] rc = srclib.metadata(datain, path) # get the metadata if rc != 0: return rc msg = None args = [] # execute putdata args.append("/setup") rc = srclib.execute("bbdesign", path, mode, args) # execute bbdesign setup if rc != 0: msg = "Abnormal termination of bbdesign" else: if not os.path.exists(path + ".inp"): # get the data msg = "Missing " + path + ".inp file" else: f = open(path + ".inp", "r") cmd = f.read() f.close() spss.Submit(cmd) if not mode: os.remove(path + ".inp") rc = srclib.execute("bbdesign", path, mode, None) # execute bbdesign go if rc != 0: msg = "Abnormal termination of bbdesign" else: if os.path.exists(path + ".out"): # write the samples f = open(path + ".out", "r") cmd = f.read() f.close() spss.Submit(cmd) # execute the command if not mode: os.remove(path + ".out") if msg: if os.path.exists(path + ".log"): # copy the log print f = open(path + ".log", "r") print f.read() f.close() print msg # print the error message else: if not os.path.exists(path + ".lst"): # copy the listing msg = "Missing " + path + ".lst file" else: print f = open(path + ".lst", "r") print f.read() f.close()
def doaggr(self, doindex): """create an aggregate dataset and tally values doindex is the index into varstolabel at which to start""" vtl = self.varstolabel[doindex:doindex+self.varsperpass] vtllen = len(vtl) if len(self.labelvars) == 1: lbls = self.labelvars lastlbl = vtllen + 1 else: lbls = self.labelvars[doindex:doindex+self.varsperpass] lastlbl = 3 * vtllen - 1 brkvarlist = "\n".join(textwrap.wrap(" ".join(vtl), width=100)) outvars = ["/min_%s=MIN(%s)/max_%s=MAX(%s)" % (mkrandomname(), v, mkrandomname(), v) for v in lbls] aggrcmd = Mkvls.aggrtemplate % (self.aggrdsname, self.aggrdsname, brkvarlist) + "\n".join(outvars) spss.Submit(aggrcmd) spss.Submit("DATASET ACTIVATE %s" % self.aggrdsname) # for each variable, build label information based on data # AGGREGATE dataset structure: # var1value, var2value,..., min(text lbl1), max(text lbl1), min(text lbl2), max(text lbl2)... # but if only one label set, only one pair of label aggregates is produced # user missing values are exposed and subject to labelling curs = spssdata.Spssdata(names=False, convertUserMissing=False) for case in curs: for v, vname in enumerate(vtl): value = case[v] minlbl = self.truncate(case[min(vtllen + v*2, lastlbl-1)], 120).rstrip() maxlbl = self.truncate(case[min(vtllen + v*2 + 1, lastlbl)], 120).rstrip() # more than one label for the same value? if minlbl != maxlbl and (minlbl != "" and minlbl is not None): self.conflicts[vname].add(value) # ignore empty or missing labels if maxlbl != "" and maxlbl is not None: # if the value has already been seen but with a different label, it's a conflict if value in self.values[vname] and not (value, maxlbl) in self.vlabels[vname]: self.conflicts[vname].add(value) else: self.vlabels[vname].add((value, maxlbl)) # first one wins self.values[vname].add(value) # tally instances where the same label used for different value # need to see whether labels has been assigned to a different value previousvalue = self.labelusage[vname].get(maxlbl, None) if previousvalue is not None and value != previousvalue: ###self.duplabels[vname] = self.duplabels[vname] + 1 self.duplabels[vname].add(maxlbl) self.labelusage[vname][maxlbl] = value curs.CClose() spss.Submit("DATASET CLOSE %s" % self.aggrdsname)
def runscript(scriptname, params={}): """Construct a parameter dictionary and run a Python script. scriptname is the path to run. params is a Python dictionary of parameter names and values. The total size of the parameter dictionary is limited to 4K (after pickling). This function returns a dictionary of values set by the script via setreturnvalue. If the script sets no return value, the result is an empty dictionary.""" fnparams = tempfile.gettempdir() + os.sep + "__SCRIPT__" fnreturn = tempfile.gettempdir() + os.sep + "__SCRIPTRETURN__" f = open(fnparams, "w+") # ensure file size is 4096 for *nix os's. f.write(1024 * "0000") f.flush() shmem = mmap.mmap(f.fileno(), 4096, access=mmap.ACCESS_WRITE) shmem.write(pickle.dumps(params)) f.close() try: os.remove(fnreturn) # ensure that no stale returns file exists except: pass ###import wingdbstub spss.Submit("SCRIPT " + spssaux._smartquote(scriptname)) shmem.close() # The _SYNC command is required in order to ensure that the script has completed spss.Submit("_SYNC") # The parameter file will be removed by the script if it calls getscriptparam, but # the following code will clean up in case the script doesn't make that call. try: os.remove(fnparams) except: pass # get the return value, if any ###import wingdbstub try: f = open(fnreturn, "r") shmem = mmap.mmap(f.fileno(), 4096, access=mmap.ACCESS_READ) ret = pickle.loads(shmem.read(4096)) shmem.close() f.close() os.remove(fnreturn) except: ret = {} return ret
def getsav(self, filespec, delete=True): """Open sav file and return all contents filespec is the file path filespec is deleted after the contents are read unless delete==False""" item = self.wdsname spss.Submit(r"""get file="%(filespec)s". DATASET NAME %(item)s. DATASET ACTIVATE %(item)s.""" % locals()) contents = spssdata.Spssdata(names=False).fetchall() spss.Submit("""DATASET CLOSE %(item)s. NEW FILE.""" % locals()) if delete: os.remove(filespec) return contents
def genData(self): """Generate variables holding all the dichotomies for values""" valcount = len(self.vvalues) computes = [] # The VALUE function only works for numeric variables :-( # In ANY, all string values are considered valid. if self.string: setvars = ",".join(self.setvars) else: setvars = ",".join(["VALUE(%s)" % v for v in self.setvars]) values = sorted(self.vvalues) varprefix = self.varprefix # if any generated variables already exist, they will be overwritten. # if they exist and are strings, the procedure will fail. for v in range(valcount): v1 = v + 1 vname = "%(varprefix)s_%(v1)02d" % locals() self.generatednames.append(vname) val = values[v] vallabel = self.valuelabels.get( val, val) # try to pick up a value label self.generatedvalues.append(val) self.generatedlabels.append(vallabel) if self.string: val = spssaux._smartquote("%s" % val) cmd = """COMPUTE %(vname)s = any(%(val)s, %(setvars)s). VARIABLE LABEL %(vname)s %(vallabel)s. VARIABLE LEVEL %(vname)s (NOMINAL).""" % locals() computes.append(cmd) spss.Submit(computes)
def list_of_cut_old(indep): temp = sys.stdout sys.stdout = open('descript.txt', 'w') spss.Submit("""FREQUENCIES VARIABLES=%s /STATISTICS=MINIMUM MAXIMUM""" % indep) sys.stdout.close() sys.stdout = temp with open('descript.txt','r') as indata: valid_flag = 0 listaallarader = [] cut_vals = [] for line in indata: listaallarader.append(line) for row in listaallarader: if 'Valid' in row: valid_flag = valid_flag +1 if '__' not in row and valid_flag == 3: if 'Total' in row: valid_flag = valid_flag+1 break e,e,value,e2,e,e,e,e = row.split('|') value = value.replace(',','.') value = value.replace(' ','') cut_vals.append(value) return cut_vals
def rename_vars(vars_in_file): for i, row in rename.iterrows(): orgname = row['old'] new_name = row['new'] if orgname in vars_in_file: spss.Submit( 'RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format( orgname=orgname, new_name=new_name))
def dohistogram(finalweightvar): """Display unweighted histogram of weights finalweightvar is the generated weight variable""" title = _("Raked Weights Histogram before Applying New Weights") cmd = histtem % locals() spss.Submit(cmd)
def doactions(filespec=None, conflict="noname", currentactivedsn=None): """Execute command""" # debugging # makes debug apply only to the current thread try: import wingdbstub if wingdbstub.debugger != None: import time wingdbstub.debugger.StopDebug() time.sleep(1) wingdbstub.debugger.StartDebug() import _thread wingdbstub.debugger.SetDebugThreads({_thread.get_ident(): 1}, default_policy=0) # for V19 use ## ###SpssClient._heartBeat(False) except: pass if filespec is None and currentactivedsn is None: raise ValueError(_("No actions were specified for this command")) activeds = spss.ActiveDataset().lower() alldatasets = getAllDatasetNames() if currentactivedsn is not None: if currentactivedsn.lower() != activeds and \ currentactivedsn.lower() in alldatasets: raise ValueError(_("""The dataset name to be assigned is already in use for another dataset: %s""")\ % currentactivedsn) spss.Submit("""DATASET NAME %(currentactivedsn)s. DATAFILE ATTRIBUTE ATTRIBUTE=%(customdsattr)s(%(currentactivedsn)s).""") if filespec is not None: # The unnamed active dataset might be empty, but we preserve it in case it isn't if activeds == "*": spss.Submit("""DATASET NAME %s.""" % ("D" + random.ranunif(.1, 1.))) spss.Submit("""GET FILE="%s". """ % filespec) thedsn = spss.GetDataFileAttributes(customdsattr) if len(thedsn) == 0: print(_("The data file does not contain a permanent dataset name. No session dataset name has been assigned.")) else: if thedsn[0].lower() in alldatasets: if conflict != "override": print(_("The permanent dataset name is already in use in this session. No session dataset name has been assigned.")) else: print(_("The dataset name has been removed from an already open dataset: %s") % thedsn[0]) spss.Submit("""DATASET NAME %s.""" % thedsn[0])
def list_of_cut(indep): context = '/outputTree' xpath = '//pivotTable[@subType="Frequencies"]//group[@text="Valid"]//category/@number' spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') tag, err = spssaux.CreateXMLOutput('FREQUENCIES VARIABLES={}.'.format(indep)) cut_list = spss.EvaluateXPath(tag, context, xpath) spss.DeleteXPathHandle(tag) return sorted(list((set(cut_list))))
def __enter__(self): """initialization for with statement""" try: spss.StartDataStep() except: spss.Submit("EXECUTE") spss.StartDataStep() return self
def doheatmap(variables, yvar, xvar, paneldownvar, panelacrossvar, finalweightvar, autoheatmap): """produce a heatmap of the weights with 2 to 4 variables if requested variables is the set of variables with control totals yvar and xvar are the main variables for the heatmap (as lists) paneldownvar and panelacrossvar are variables for paneling down and across finalweightvar is the output weight variable autoheatmap specifies that the first two to four control variables define the heatmap dimensions""" # autoheatmap is used to get around dialog box limitations and overrides other # related variable specifications without a warning if len(variables) == 1: return # No heatmap available if autoheatmap: yvar = [variables[0]] xvar = [variables[1]] hmsize = min(autoheatmap, len(variables), 4) if hmsize >=3: paneldownvar = [variables[2]] else: paneldownvar = None if hmsize >=4: panelacrossvar = [variables[3]] else: panelacrossvar = None plotvars = (yvar, xvar, paneldownvar, panelacrossvar) if not any(plotvars): return # no plot requested if not all([yvar, xvar]): print(_("No heatmap produced: both y and x variables must be specified")) return if set(v[0] for v in plotvars if v is not None) - set(variables): print(_("No heatmap produced: only raking variables can be specified")) return if not paneldownvar: panelingdownvars = "" panelingdowntem = "" else: panelingdownvar = paneldownvar[0] panelingdownvars = """%(panelingdownvar)s[LEVEL=nomimal]""" % locals() panelingdowntem = """ "Panel down"="%(panelingdownvar)s"[DATASET="graphdataset"]""" % locals() if not panelacrossvar: panelingacrossvars = "" panelingacrosstem = "" else: panelingacrossvar = panelacrossvar[0] panelingacrossvars = """%(panelingacrossvar)s[LEVEL=nomimal]""" % locals() panelingacrosstem = """ "Panel across"="%(panelingacrossvar)s"[DATASET="graphdataset"]""" % locals() title = _("Unweighted Heatmap of Weights by Raking Variables") label = _("Weight Heatmap") yvar = yvar[0] xvar = xvar[0] cmd = hmtemplate % locals() spss.Submit(cmd)
def doblock(blocknum, atstart, atend, errorprint, errorcont, errorcall, syntax): """Execute block of syntax blocknum is the condition number atstart and atend are text to be displayed before and after errorprint is what to display on a syntax error errrcont is whether to continue running lines or stop syntax is a list of syntax lines to execute.""" if not atstart is None: print(atstart.replace(")BLOCK", str(blocknum + 1))) lastline = len(syntax) - 1 if lastline < 0: raise ValueError(_("""A syntax command block contains no syntax""")) # Submit each command one by one and handle error conditions cmd = [] inmatrix = False for linenum, line in enumerate(syntax): cmd.append(line) # block or pseudo-block commands have to be submitted in a single call testline = line.rstrip().lower() if testline in ["matrix", "matrix."]: inmatrix = True dosubmit = not inmatrix and (linenum == lastline or (testline.endswith(".")\ and (syntax[linenum+1].lower().strip() not in ["begin gpl", "begin gpl."]))) if testline == "end matrix.": inmatrix = False dosubmit = True if dosubmit: try: spss.Submit(cmd) cmd = [] except: if not errorprint is None: print(errorprint.replace(")BLOCK", str(blocknum + 1))) if not errorcall is None: # an error function can take control on error. # It can return "stop" or "continue" to override the action specified in STATS IF action = errorcall(blocknum + 1, cmd, spss.GetLastErrorLevel(), spss.GetLastErrorMessage()) if action == "stop": break elif action == "continue": cmd = [] continue if not errorcont: break cmd = [] if not atend is None: print(atend.replace(")BLOCK", str(blocknum + 1)))
def main(): os.chdir(imputations) spssaux.OpenDataFile(gb.select_rev) cmd = 'DATASET NAME orginal.\n' cmd += 'cd "{cwd}".\n'.format(cwd=imputations) spss.SetOutput("off") cmd += fix_value_lables() with open(imputation_syntax, 'w') as out: out.write(cmd) spss.Submit(cmd) run_cmd = impute_item_for_item() with open(imputation_syntax, 'a') as out: out.write('\n' + run_cmd) spss.Submit(run_cmd) match_files() cmd = recode() with open(imputation_syntax, 'a') as out: out.write('\n' + cmd) spss.Submit("""SAVE OUTFILE = '{imputed_data}' /COMPRESSED.\n""".format(imputed_data=gb.imputed_data))
def get_hostname(lines): m = re.search('\[Contacting .* host "(.*)".*\]', "\n".join(lines)) if m: hostname = m.group(1) if hostname.lower() == "no-net": msg = "This program is intended for concurrent licenses only" issue_warning(msg) spss.Submit("show license.") return hostname else: raise ValueError("Hostname not found")
def setgen(self): """construct a new MR set of the appropriate type""" cmd = """MRSETS /MDGROUP NAME=%(outputname)s LABEL="%(label)s" VARIABLES = %(variables)s VALUE=1 /DISPLAY NAME=[%(outputname)s]""" outputname = self.setname label = self.mrsetinfo.getSetLabel(self.mcset) variables = " ".join(self.generatednames) spss.Submit(cmd % locals())
def __init__(self): try: spss.StartDataStep() except: spss.Submit("EXECUTE.") spss.StartDataStep() self.ds = spss.Dataset() self.varlist = self.ds.varlist self.mrsets = {} # the api always returns the set name in upper case for name, theset in self.ds.multiResponseSet.data.items(): self.mrsets[name.upper()] = theset
def addinfo(filespec): """open the file if appropriate type, extract variable information, and add it to dataset dsname. filespec is the file to open dsname is the dataset name to append to filetypes is the list of file types to include.""" fnsplit = os.path.split(filespec)[1] fn, ext = os.path.splitext(fnsplit) for ft in filetypes: if ext in ftdict[ft]: if pat is None or pat.match(fn): try: spss.Submit(spsscmd[ft] % filespec) spss.Submit("DATASET NAME @__GATHERMD__.") except: if not isinstance(filespec, str): filespec = str(filespec, encoding) raise EnvironmentError(_("File could not be opened, skipping: %s") % filespec) break else: return addinfo with DataStep(): ds = spss.Dataset(name=dsname) # not the active dataset dssource = spss.Dataset(name="*") # The dataset to examine numvars = spss.GetVariableCount() # active dataset variables = dssource.varlist for v in range(numvars): lis = [filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v)] lis.extend(blanks) lis = [item+ 256*" " for item in lis] ds.cases.append(lis) #ds.cases.append([filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v), *blanks]) if includeAttrs: attrs = variables[v].attributes.data for a in attrs: if a.lower() in attrindexes: ds.cases[-1, attrindexes[a.lower()]+ 3] = attrs[a][0] + attrlength * " "# allow for standard variables spss.Submit("DATASET CLOSE @__GATHERMD__.")
def execute(self, commands): transformedCommands = ['* Encoding: UTF-8.'] for command in commands: # command = command.replace("\n", " "); if (len(command) >= 1 and not (command[-1] == '.')): command += '.' # SPSS probably only understands ASCII transformedCommands.append(command) #try: """ Execute all commands as batch; this allows to execute BEGIN DUMMY. [...] END DUMMY. as well i.e. BEGIN MATRIX. [...] END MATRIX. """ spss.Submit(transformedCommands)
def add_suffix(folder): for item in ["/no_strings", "/strings"]: datafiles = get_filelist(folder + item, 'sav') for file in datafiles: print(file) exclude = ['kod_id'] #Ange namnet på id_variablen spssaux.OpenDataFile(file) basename = os.path.basename(file).strip('.sav') suffix = basename #önskat suffix print(basename) vars = spssaux.VariableDict().variables for i in exclude: if i in vars: vars.remove(i) oldnames = spssaux.VariableDict().expand(vars) newnames = [varnam + "_" + suffix for varnam in oldnames] spss.Submit('rename variables (%s=%s).' % ('\n'.join(oldnames), '\n'.join(newnames))) spss.Submit(""" SAVE OUTFILE = "%s%s". DATASET CLOSE ALL. NEW FILE. """ % (folder + item + '/suffix/', basename + '.sav'))
def match_files(): cmd1 = '' cmd2 = '' A = funcs.get_filelist(imputations, 'sav') f = lambda A, n=30: [A[i:i + n] for i in range(0, len(A), n)] cmd1 += '\n'.join([ 'MATCH FILES\n' + '\n'.join([' /FILE="%s"' % fil for fil in dl]) + '\n /BY Imputation_ %s.\n' % gb.id + 'SAVE OUTFILE="part_%s.sav"\n/COMPRESSED.\n' % f(A).index(dl) for dl in f(A) ]) with open(imputation_syntax, 'a') as out: out.write('\n' + cmd1) spss.Submit(cmd1) B = [x for x in funcs.get_filelist(imputations, 'sav') if 'part_' in x] cmd2 += '\n'.join([ 'MATCH FILES\n' + '\n'.join([' /FILE="%s"' % fil for fil in B]) + '\n /BY Imputation_ %s.\n' % gb.id + 'SAVE OUTFILE="COMPLETE.sav"\n/COMPRESSED.\n' ]) cmd2 += 'DATASET CLOSE ALL.\n' with open(imputation_syntax, 'a') as out: out.write('\n' + cmd2) spss.Submit(cmd2)
def createHandle(self, handle, spec, encoding=None): """Create a file handle and update the handle list accordingly handle is the name of the handle spec is the location specification, i.e., the /NAME value encoding optionally specifies the encoding according to the valid values in the FILE HANDLE syntax.""" spec = re.sub(r"[\\/]", re.escape(os.path.sep), spec) # clean up path separator cmd = """FILE HANDLE %(handle)s /NAME="%(spec)s" """ % locals() # Note the use of double quotes around the encoding name as there are some encodings that # contain a single quote in the name if encoding: cmd += ' /ENCODING="' + encoding + '"' spss.Submit(cmd) self.fhdict[handle.lower()] = (spec, encoding)
def search(name=None, dir=None, mode=None): proc = "search" args = srclib.arguments(name, dir, mode, proc) # get the arguments if not args: return 1 path = args[0] datain = args[1] dataout = args[2] rc = srclib.metadata(datain, path) # get the metadata if rc != 0: return rc msg = None rc = srclib.execute("srchset", path, mode, None) # execute srchset if rc != 0: msg = "Abnormal termination of srchset" else: if not os.path.exists(path + ".inp"): # get the data msg = "Missing " + path + ".inp file" else: f = open(path + ".inp", "r") cmd = f.read() f.close() spss.Submit(cmd) if not mode: os.remove(path + ".inp") rc = srclib.execute("search", path, mode, None) # execute search if rc != 0: msg = "Abnormal termination of search" else: if dataout: srclib.residuals(datain, dataout, path, mode) # output the residuals if msg: if os.path.exists(path + ".log"): # copy the log print f = open(path + ".log", "r") print f.read() f.close() print msg # print the error message else: if not os.path.exists(path + ".lst"): # copy the listing msg = "Missing " + path + ".lst file" else: print f = open(path + ".lst", "r") print f.read() f.close()
def calculate_odds(db_prefixed,start,stop,prefix): cmd_recode = '' cmd_log_reg = '' db_list = [] db_err_list = [] #input_indep = db_glob.query('dependent != 1') input_dep = db_glob.query('dependent == 1') dep_list = get_median_cut() n = 0 for time in range(start,stop): dep_list_time = [prefix+str(time)+item[4:] for item in dep_list] myset = set(dep_list_time) dep_list_time = list(myset) input_indep = db_prefixed.query('dependent != 1 and time == "%s%s"' % (prefix,str(time))) for typ in columns_to_use: if typ == version: suffix = '_middle' if typ == single_items: suffix = '_single' for indep in input_indep[typ].unique(): if str(indep) != 'nan': indep = indep + suffix cutoffs = list_of_cut(indep) for dep in dep_list_time: for x in range(len(cutoffs) - 1): if n >= error_search: #cmd_log_reg += recode_cut(indep,cutoffs,x) #cmd_log_reg += log_reg_cmd(dep,indep) spss.Submit(recode_cut(indep,cutoffs, x)) db, db_err = log_reg(dep,indep,cutoffs[x+1],n) db_list.append(db) db_err_list.append(db_err) # spss.Submit(odds(dep, indep)) #db_list.append(log_reg(dep,indep,cutoffs[x+1],n)) n += 2 #db_list.append(find_value(indep,dep,prefix+str(time))) db = pd.concat(db_list) db_err = pd.concat(db_err_list) #print(cmd_log_reg) return db, db_err