def __init__(self, outputDoc=None, selected=False): if outputDoc: self.outputDoc = outputDoc else: self.outputDoc = SpssClient.GetDesignatedOutputDoc() self.items = self.outputDoc.GetOutputItems() self.__itemType = [] self.__selected = selected
def __enter__(self): """initialization for with statement""" if ClientSession.count == 0: try: SpssClient.StartClient() except: raise RuntimeError(_("SpssClient.StartClient failed.")) ClientSession.count += 1 return self
def spssCwd(fh=None, theType=None, cd=None, macro=None): """ Get the path name of the designated data file or syntax file """ SpssClient.StartClient() path = "." try: if fh is None: fh = "syntaxdir" if theType == "syntax" or theType is None: if SpssClient.GetDesignatedSyntaxDoc() is not None: path = SpssClient.GetDesignatedSyntaxDoc().GetDocumentPath() elif theType == "data": if SpssClient.GetActiveDataDoc() is not None: path = SpssClient.GetActiveDataDoc().GetDocumentPath() finally: SpssClient.Exit() if not path: path = os.path.abspath(".") print("\nWarning # No path defined. This means that your %s has not been saved yet.\nUsing '%s'\n" % \ ("syntax file" if theType == "syntax" or theType is None else "data file", path)) else: path = os.path.dirname(path) cmds = ["FILE HANDLE %(fh)s /NAME='%(path)s'." % locals()] if cd or cd is None: cmds.append("CD '%s'." % path) if macro or macro is None: cmds.append("DEFINE !%s () '%s' !ENDDEFINE." % (fh, path)) if debug: print("\n".join(cmds)) if path: spss.Submit(cmds) return path
def outputAttrs(header=None, footer=None, margins=None, orientation=None, pagenumber=None, itemspacing=None, chartsize=None): """Set printing properties for designated Viewer window or globally""" # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use #SpssClient._heartBeat(False) #except: #pass if margins and len(margins) != 4: raise ValueError( _("""MARGINS must specify four values (left, right, top, bottom in points)""" )) try: SpssClient.StartClient() desout = SpssClient.GetDesignatedOutputDoc( ) # empty window gets created if none # header and footer text # headers and footers are always handled as html, so a <br> causes a line break # text must be in UTF-8 encoding for item, api in zip([header, footer], [desout.SetHeaderText, desout.SetFooterText]): if item: item = [line for line in item if line] item = "<br>".join(item) #item = str(item.encode("UTF-8", "replace")) api(item) # page margins if margins: opts = [ SpssClient.PrintOptions.LeftMargin, SpssClient.PrintOptions.RightMargin, SpssClient.PrintOptions.TopMargin, SpssClient.PrintOptions.BottomMargin ] for i in range(4): if margins[i] >= 0: desout.SetPrintOptions(opts[i], str(margins[i])) # page orientation if orientation: desout.SetPrintOptions(SpssClient.PrintOptions.Orientation, orientation == "portrait" and "1" or "2") # starting page number if pagenumber: desout.SetPrintOptions(SpssClient.PrintOptions.StartingPageNumber, str(pagenumber)) # inter-item spacing if itemspacing: desout.SetPrintOptions(SpssClient.PrintOptions.SpaceBetweenItems, str(itemspacing)) # chart size # feature removed as api does not work #if chartsize: #parm = ["asis", "fullpage", "halfpage", "quarterpage"].index(chartsize) # already validated #desout.SetPrintOptions(SpssClient.PrintOptions.PrintedChartSize, str(parm)) finally: SpssClient.StopClient()
# This script is not intended to be directly run. As the python modules bundled with SPSS 24 are only compatible with # python 2.7 and 3.4 and our machines are set up with 3.6, this script must be run with the 3.4 python interpreter that # comes bundled with SPSS. import SpssClient import spssaux import spss import csv import re SpssClient.StartClient() # User defined variables input_file = r'user_input_1' output_file = r'user_input_2' output_var_vals_file = r'user_input_3' output_var_info_file = r'user_input_4' # Load sav-file. spssaux.OpenDataFile(input_file) # Count number of columns varCount = spss.GetVariableCount() caseCount = spss.GetCaseCount() print('There are %d variables in this file' % varCount) print('There are %d cases. Please check this matches number of cases in the output file' % spss.GetCaseCount()) # Clean file: only string columns for ind in range(varCount): varName = spss.GetVariableName(ind)
def modify(subtype, select=None, skiplog=True, process="preceding", dimension='columns', level=-1, hide=False, widths=None, rowlabels=None, rowlabelwidths=None, textstyle=None, textcolor=None, bgcolor=None, applyto="both", customfunction=None, printlabels=False, regexp=False, tlook=None, countinvis=True, sigcells=None, siglevels="both"): """Apply a hide or show action to specified columns or rows of the specified subtype or resize columns subtype is the OMS subtype of the tables to process or a sequence of subtypes select is a sequence of column or row identifiers or ["ALL"]. Identifiers can be positive or negative numbers, counting from 0. Negative numbers count from the end: -1 is the last row or column. Identifiers can also be the text of the lowest level in the column heading. If the value is or can be converted to an integer, it is assumed to be a column number. Numeric values are truncated to integers. You cannot hide all the items even though this routine will try. process specifies "preceding" to process the output of the preceding command or "all" to process all tables having any of the specified subtypes. level defaults to the innermost level (-1). Specify a more negative number to move out or up in the label array. -2, for example, would be the next-to-innermost level. When counting columns or rows, count at the innermost level regardless of the level setting. Hide cannot be combined with other actions. if skiplog is True, if the last item in the Viewer is a log, the search starts with the preceding item. It needs to be True if this function is run from the extension command and commands are echoing to the log. dimension == 'columns' indicates that columns should be operated on. dimension == 'rows' specifies rows. widths specifies the width or widths to be applied to the selected rows or columns If it is a single element, it is used for all specified columns. Otherwise, it is a sequence of sizes in points of the same length as the select list. rowlabels and rowlabelwidths can be specified to set stub (row) widths. rowlabels can only contain numbers. textstyle, textcolor, and bgcolor apply formatting. colors are specified as three integers for RGB. textstyle can be REGULAR, BOLD, ITALIC, or BOLDITALIC APPLYTO can be BOTH, DATACELLS, LABELS, or a Boolean Python expression in which x stands for the cell value. customfunction is a list of module.function names of functions to be called as cells are styled. This function processes the latest item in the designated Viewer: all pivot tables for that instance of the procedure are processed according to the subtype specification. """ # ensure localization function is defined # pot file must be named SPSSINC_MODIFY_TABLES.pot global _ try: _("---") except: def _(msg): return msg ###debugging #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) #except: #pass SpssClient.StartClient() try: info = NonProcPivotTable("INFORMATION", tabletitle=_("Information")) c = PtColumns(select, dimension, level, hide, widths, rowlabels, rowlabelwidths, textstyle, textcolor, bgcolor, applyto, customfunction, printlabels,regexp, tlook, sigcells, siglevels) if sigcells is not None and not v24ok: raise ValueError(_("""Significance highlighting requires at least Statistics version 24""")) if not _isseq(subtype): subtype=[subtype] # remove white space subtype = ["".join(st.lower().split()) for st in subtype] # remove matching outer quotes of any type subtype = [re.sub(r"""^('|")(.*)\1$""", r"""\2""", st) for st in subtype] if "*" in subtype: subtype = ["*"] items = SpssClient.GetDesignatedOutputDoc().GetOutputItems() itemcount = items.Size() if skiplog and items.GetItemAt(itemcount-1).GetType() == SpssClient.OutputItemType.LOG: itemcount -= 1 for itemnumber in range(itemcount-1, -1, -1): item = items.GetItemAt(itemnumber) if process == "preceding" and item.GetTreeLevel() <= 1: break if item.GetType() == SpssClient.OutputItemType.PIVOT and\ (subtype[0] == "*" or "".join(item.GetSubType().lower().split()) in subtype): c.thetable = item.GetSpecificType() if not countinvis: set23(c.thetable) c.applyaction(c.thetable, info) finally: info.generate() SpssClient.StopClient()
def execute(sFile, outDir): fNames = os.path.split(sFile) initName = fNames[1].split('.')[0] reportFile = initName + '_report.spv' reportFile = os.path.join(outDir, 'report_files', reportFile) print 'reportFile is %r' %reportFile resultFile = initName + '_result.csv' resultFile = os.path.join(outDir, 'report_files', resultFile) report_dir = os.path.join(outDir, 'report_files') if not os.path.exists(report_dir): os.makedirs(report_dir) htmlTemplate = "html_template.html" try: sc.StartClient() print("begin to execute synx...") print ("begin to get data from %s "%sFile) sc.RunSyntax(r""" GET DATA /TYPE=TXT /FILE="%s" /ENCODING='UTF8' /DELCASE=LINE /DELIMITERS="," /ARRANGEMENT=DELIMITED /FIRSTCASE=2 /IMPORTCASE=ALL /VARIABLES= province A200 rptn F4.0 rptm F2.0 corpId F5.0 corpName A43 C1 F8.4 C2 F8.4 C3 F8.4 C4 F10.4 C5 F8.4 C6 F8.4 C7 F7.4 C8 F7.4 C9 F7.4 C10 F7.4 C11 F7.4 C9_avg F7.4 C10_avg F7.4 C11_avg F7.4. CACHE. EXECUTE. DATASET NAME dataset1 WINDOW=FRONT. DATASET ACTIVATE dataset1. COMPUTE t1=1/C1. EXECUTE. COMPUTE t7=1-C7. EXECUTE. COMPUTE t8=1-C8. EXECUTE. COMPUTE t9=1/abs(C9_avg-C9). EXECUTE. COMPUTE t10=1/abs(C10_avg-C10). EXECUTE. COMPUTE t11=1/abs(C11_avg-C11). EXECUTE. FACTOR /VARIABLES t1 C2 C3 C4 C5 C6 t7 t8 t9 t10 t11 /MISSING LISTWISE /ANALYSIS t1 C2 C3 C4 C5 C6 t7 t8 t9 t10 t11 /PRINT INITIAL CORRELATION KMO EXTRACTION ROTATION FSCORE /FORMAT BLANK(.10) /PLOT EIGEN ROTATION /CRITERIA FACTORS(6) ITERATE(25) /EXTRACTION PC /CRITERIA ITERATE(25) /ROTATION VARIMAX /SAVE REG(ALL) /METHOD=CORRELATION. OUTPUT SAVE NAME=Document1 OUTFILE='%s' LOCK=NO. SAVE TRANSLATE OUTFILE='%s' /TYPE=CSV /ENCODING='UTF8' /MAP /REPLACE /FIELDNAMES /CELLS=VALUES. """ %(sFile, reportFile, resultFile)) print("exec synx complete !") # can't save to csv file #activeDataDoc = sc.GetActiveDataDoc() #activeDataDoc.SaveAs('d:/tmp/0801/new_dataSet.csv') ### get pivot table number outDoc = sc.GetDesignatedOutputDoc() outputItems = outDoc.GetOutputItems() pivotTable = getPivotTable(outputItems, 'Total Variance Explained') rowLabels = pivotTable.RowLabelArray() columnLabels = pivotTable.ColumnLabelArray() dataArray = pivotTable.DataCellArray() weight_facts = [] for i in range(6): weight_facts.append(float(dataArray.GetValueAt(i, 7))) weight_sum = float(dataArray.GetValueAt(5, 8)) print("each weight[0-5, 7] : ", weight_facts) print("sum: [5,8]", weight_sum) ## caculate result of score number filename = resultFile req = re.compile(r'\t|,|:') data_file_name =filename returnMat = [] ## read factor data from data_file print "data_file_name: %r" %data_file_name i = 0 with open(data_file_name) as fr: for line in fr.readlines(): line = line.strip() listFromLine = req.split(line) if i == 0 : listFromLine.append('score') returnMat.append(listFromLine) i = i + 1 continue else: score = 0 try: fact1 = float(listFromLine[25]) fact2 = float(listFromLine[26]) fact3 = float(listFromLine[27]) fact4 = float(listFromLine[28]) fact5 = float(listFromLine[29]) fact6 = float(listFromLine[30]) score = (fact1 * weight_facts[0] + fact2 * weight_facts[1] + fact3 * weight_facts[2] + \ fact4 * weight_facts[3] + fact5 * weight_facts[4] + fact6 * weight_facts[5])/weight_sum except ValueError as e: pass listFromLine.append(score) returnMat.append(listFromLine) i = i + 1 #print(returnMat) def lastElem(l): return l[-1] returnMat.sort(key = lastElem, reverse = True) f = open(resultFile, "wb") w = csv.writer(f) w.writerows(returnMat) f.close() area_id = initName.split('_')[1] area_name = contant.area_map[area_id] if area_name is None: area_name = initName htmlFile = area_name + r"_report.html" htmlFile = os.path.join(outDir, 'html', htmlFile) htmlDir = os.path.join(outDir, 'html') if not os.path.exists(htmlDir): os.makedirs(htmlDir) # generate html files htmlF = open(htmlFile, 'w') html = u""" <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title></title> <style type="text/css"> table.gridtable { font-family: verdana, arial, sans-serif; font-size: 11px; color: #333333; border-width: 1px; border-color: #666666; border-collapse: collapse; } table.gridtable th { border-width: 1px; padding: 8px; border-style: solid; border-color: #666666; background-color: #dedede; } table.gridtable td { border-width: 1px; padding: 8px; border-style: solid; border-color: #666666; } </style> </head> """ html += u"<body>" # the top 20 table_html = convertMatrixToHtml('前20名典当行', returnMat[0:20]) html += table_html # the last 20 # table_html = convertMatrixToHtml('后20名典当行', returnMat[-1:-21:-1]) # html += table_html pivotTable = getPivotTable(outputItems, 'Rotated Component Matrix') row = 12 column = 7 rowLabels = [u'资产负债率', u'总资产增长率', u'典当资金周转率', u'总资产利润率', u'净资产增长率', u'净资产收益率', u'期末逾期贷款率', u'绝当率', u'单笔超注册资金25%贷款占比', u'房地产集中度', u'大额房地产余额占比'] table = getRotatedMatrix(pivotTable, rowLabels, row, column) table_html = convertMatrixToHtml("旋转后的成分矩阵", table, True) html += table_html pivotTable = getPivotTable(outputItems, 'Total Variance Explained') row = 12 column = 10 rowLabels = [u'组件1', u'组件2', u'组件3', u'组件4', u'组件5', u'组件6', u'组件7', u'组件8', u'组件9', u'组件10', u'组件11'] table = getRotatedMatrix(pivotTable, rowLabels, row, column) table_html = convertMatrixToHtml("总方差解释", table) html += table_html # all table_html = convertMatrixToHtml('所有典当行排名', returnMat) html += table_html html += u"</body>" html += u"</html>" htmlF.write(html.encode('utf-8')) htmlF.close() finally: sc.StopClient()
def modify(select=None, command=None, subtype=None, process="preceding", visibility=True, outlinetitle=None, outlinetitlestart=None, outlinetitleregexp=None, outlinetitleend=None, itemtitle=None, itemtitlestart=None, itemtitleregexp=None, itemtitleend=None, repoutlinetitle=None, repitemtitle=None, repoutlinetitleregexp=None, repitemtitleregexp=None, sequencestart=None, sequencetype="", visible="asis", customfunction=None, ignore=None, breakbeforetitles=False, titlelevel="top", breakfirst=True): """Execute SPSSINC MODIFY OUTPUT command. See SPSSINC_MODIFY_OUTPUT.py for argument definitions.""" # ensure localization function is defined. po file must be named SPSSINC_MODIFY_OUTPUT.pot global _ try: _("---") except: def _(msg): return msg ##debugging #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(2) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) #except: pass #if not customfunction is None: #customfunction = [resolvestr(f) for f in customfunction] for arg in [select, command, subtype]: if arg is None: arg = [] info = NonProcPivotTable("INFORMATION", tabletitle=_("Information")) try: with ClientSession(): global desout desout = SpssClient.GetDesignatedOutputDoc() if not customfunction is None: customfunction = [resolvestr(f) for f in customfunction] items = desout.GetOutputItems() itemkt = items.Size() start = 1 #do not process the root if process == "preceding": # work back until level 1 item found, ignoring logs. for i in xrange(itemkt-1, 0, -1): item = items.GetItemAt(i) if item.GetType() == SpssClient.OutputItemType.LOG: itemkt -= 1 continue if item.GetTreeLevel() == 1: start = i break; filter = ItemFilter(select, command, subtype, outlinetitle, outlinetitlestart, outlinetitleregexp, outlinetitleend, itemtitle, itemtitlestart, itemtitleregexp, itemtitleend, breakbeforetitles, info) processor = ItemProcessor(repoutlinetitle, repitemtitle, repoutlinetitleregexp, repitemtitleregexp, filter.outlinetitleregexp, filter.itemtitleregexp, sequencestart, sequencetype, breakbeforetitles, titlelevel, breakfirst) if visible == "delete": desout.ClearSelection() for itemnumber in range(start, itemkt): item = items.GetItemAt(itemnumber) isVisible = item.IsVisible() # visibility criterion can be "true", "false", or "all" if (isVisible and not visibility == "false") or (not isVisible and not visibility == "true"): if filter.filter(item): processor.apply(item) if visible == "true": item.SetVisible(True) elif visible == "false": item.SetVisible(False) elif visible == "delete": item.SetSelected(True) continue if not customfunction is None: for f in customfunction: f(item) if visible == "delete": # items to be deleted are (the only ones) selected desout.Delete() finally: info.generate()
def __exit__(self, type, value, tb): ClientSession.count -= 1 if ClientSession.count <= 0: SpssClient.StopClient() return False
# This python3 script is to be used with SPSS # to reformat output from SPSS multiple regression into a more readable table import SpssClient SpssClient.StartClient() oDoc = SpssClient.GetDesignatedOutputDoc() #Access active output window oItems = oDoc.GetOutputItems() #Look up all items in output window for index in range(oItems.Size()): #Loop through indices of output items oItem = oItems.GetItemAt(index) #Access each output item if oItem.GetType() == SpssClient.OutputItemType.PIVOT: #Continue only if output item is pivot table pTable = oItem.GetSpecificType() #Access pivot table datacells = pTable.DataCellArray() PivotMgr = pTable.PivotManager() ColLabels = pTable.ColumnLabelArray() #pull information from Model Summary Table and ANOVA Table (for footnote) and from Coefficients Table if pTable.GetTitleText() == 'Model Summary': modelsuminfo = {} #keys will be the names of the stats we need #'R Square' and 'Adjusted R Square #values will be the corresponding value of each statistic for i in range(1,ColLabels.GetNumRows()):#iterating through all rows and columns of the Column Labels for j in range(ColLabels.GetNumColumns()): label = ColLabels.GetValueAt(i,j) if label in ['R Square', 'Adjusted R Square']:#the stats we are interested in lastrowindex = datacells.GetNumRows() - 1 #in this case there's only one row of data #but in diff regression models there can be multiple rows, and last row #contains the final values we want valuetoadd= float(datacells.GetUnformattedValueAt (lastrowindex, j)) #the value of R Square or Adjusted R Square valuetoadd = "{0:.2f}".format(valuetoadd)
#/*********************************************************************** # * IBM Confidential # * # * OCO Source Materials # * # * IBM SPSS Products: Statistics Common # * # * (C) Copyright IBM Corp. 1989, 2011 # * # * The source code for this program is not published or otherwise divested of its trade secrets, # * irrespective of what has been deposited with the U.S. Copyright Office. # ************************************************************************/ import SpssClient SpssClient.StartClient() objSpssScriptContext = SpssClient.GetScriptContext() if objSpssScriptContext: objSpssOutputItem = objSpssScriptContext.GetOutputItem() else: objSpssOutputDoc = SpssClient.GetDesignatedOutputDoc() objSpssOutputItem = objSpssOutputDoc.GetCurrentItem() if objSpssOutputItem.GetType() == SpssClient.OutputItemType.PIVOT: objSpssPivotTable = objSpssOutputItem.GetSpecificType() objSpssPivotMgr = objSpssPivotTable.PivotManager() objSpssPivotMgr.TransposeRowsWithColumns() else: pass SpssClient.StopClient()
def createText(strings=[""], otitle="Comment", heading="Comment", pagebreak=False, wrap=None): """Create a textblock in the Viewer with contents strings. strings is a sequence of lines of text to insert in the block. If omitted, the block will be empty. otitle is an optional title to appear in the outline. heading is the procedure name that will appear first in the outline and the associated item on the right. If pagebreak is True and this is version 17 or later, a pagebreak is inserted. If the text appears to be html or rtf, it is inserted with formatting (using a scripting api) if 17 or later """ # debugging # makes debug apply only to the current thread #try: #import wingdbstub #if wingdbstub.debugger != None: #import time #wingdbstub.debugger.StopDebug() #time.sleep(1) #wingdbstub.debugger.StartDebug() #import thread #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0) ## for V19 use ### ###SpssClient._heartBeat(False) #except: #pass try: spss.StartProcedure(heading) except: raise ValueError( _("Error: There must be a nonempty active dataset before using this command." )) user = getpass.getuser() curdate = time.asctime() for i in range(len(strings)): strings[i] = strings[i].replace(")USER", user) strings[i] = strings[i].replace(")CURDATE", curdate) start = strings[0][:7].lower() # The rtf code below screams for an r prefix, but the start text is coming through non-raw if not (start.startswith("<html>") or start.startswith("{\rtf") ) or spss.GetDefaultPlugInVersion()[4:] < "170": strings, nitems = reducer(strings) if not wrap is None: strings = "\n".join( textwrap.wrap(strings, width=wrap, break_long_words=False)) t = spss.TextBlock(otitle, strings) # lines are appended at once for better performance spss.EndProcedure() else: spss.TextBlock(otitle, "") spss.EndProcedure() # do the rest with scripting apis import SpssClient SpssClient.StartClient() time.sleep( .1) # text block should have arrived in Viewer, but pause briefly odoc = SpssClient.GetDesignatedOutputDoc() items = odoc.GetOutputItems() # Allow for some delay in the text block getting into the Viewer for i in range(5): size = items.Size() item = items.GetItemAt(size - 1) if item.GetType() == SpssClient.OutputItemType.TEXT: break time.sleep(.5) specificitem = item.GetSpecificType() specificitem.SetTextContents("".join(strings)) item.SetDescription(otitle) items.GetItemAt(size - 2).SetVisible(False) if pagebreak: item.SetPageBreak(True) SpssClient.StopClient() return if pagebreak and spss.GetDefaultPlugInVersion()[4:] >= '170': import SpssClient try: SpssClient.StartClient() items = SpssClient.GetDesignatedOutputDoc().GetOutputItems() item = items.GetItemAt(items.Size() - 1) item.SetPageBreak(True) SpssClient.StopClient() except: # do not propagate SpssClient.StopClient()
#!/usr/bin/env python # -*- coding: utf-8 -*- import SpssClient SpssClient.StartClient() SpssClient.RunSyntax(r""" *Running spss code.