def get_median_cut(): cmd = '' cmd2 = '' input_dep = db_glob[db_glob['dependent'] == 1 & db_glob[version].notnull()] for dep in input_dep[version].unique(): db = input_dep['items'][input_dep[version] == dep] for time in range(start,stop): list_of_dep_items = [prefix + str(time) + var for var in db] cmd += 'COMPUTE {var}=mean.1({var_list}).\n'.format(var=prefix+str(time)+dep,var_list=','.join(list_of_dep_items)) cmd += 'EXECUTE.\n' #print(cmd) spss.Submit(cmd) list_of_dep = [prefix + str(start) + dep for dep in input_dep[version].unique()] tag, err = spssaux.CreateXMLOutput("""FREQUENCIES VARIABLES={vars} /STATISTICS=MEDIAN.\n""".format(vars=' '.join(list_of_dep))) spss.GetXmlUtf16(tag,folder+'/fest.xml') context = "/outputTree" xpath = "//category[@text='Median']/dimension[@text='Variables']/category[@variable='true']//@*[name()='number' or name()='varName']" median_list = spss.EvaluateXPath(tag, context, xpath) median_list = [[prefix + str(i) + median_list[n][4:],median_list[n+1]] for i in range(start,stop) for n in range(0,len(median_list),2)] for sublist in median_list: cmd2 += 'RECODE {var} (Lowest thru {mean}=0) (sysmis,77,88,99,0=sysmis) (else=1) into {var}_cut.\n'.format(var=sublist[0],mean=sublist[1]) cmd2 += 'EXECUTE.\n' #print(cmd2) spss.Submit(cmd2) return list_of_dep
def list_of_cut(indep): context = '/outputTree' xpath = '//pivotTable[@subType="Frequencies"]//group[@text="Valid"]//category/@number' spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') tag, err = spssaux.CreateXMLOutput('FREQUENCIES VARIABLES={}.'.format(indep)) cut_list = spss.EvaluateXPath(tag, context, xpath) spss.DeleteXPathHandle(tag) return sorted(list((set(cut_list))))
def getAllDatasetNames(): """Return a list of all dataset names currently in use in lower case""" tag = "D" + str(random.uniform(.1, 1)) spss.Submit("""OMS select tables /IF COMMAND='Dataset Display'/DESTINATION xmlworkspace='%(tag)s' VIEWER=NO /TAG="%(tag)s". DATASET DISPLAY. OMSEND /TAG='%(tag)s'.""" % locals()) # columns were added to Datasets pivot table in V22 if v22ok: xpathexpr= '//pivotTable[@subType="Datasets"]//category/dimension[@axis="column"]/category[position()=1]/cell/@text' else: xpathexpr = '//pivotTable[@subType="Datasets"]//cell/@text' # if there are no real datasets, the name (unnamed) or its translation will be in the table ds = spss.EvaluateXPath(tag, "/", xpathexpr) spss.DeleteXPathHandle(tag) return [d.lower() for d in ds]
def GetWeightSum(varlist=None): """Return the sum of the case weights as calculated by DESCRIPTIVES varlist is an optional variable list that would cause cases to be listwise deleted If weights are not on, the return value is the unweighted number of cases.""" if varlist is None: varlist = [spss.GetWeightVar()] if not varlist: varlist = ["V" + str(random.random(.1, 1))] spss.Submit("""TEMPORARY. COMPUTE %s = 0.""" % varlist[0]) tag, err = spssaux.CreateXMLOutput("DESCRIPTIVES %s /STATISTICS=MIN." % " ".join(varlist), omsid='Descriptives') #subtype='Descriptive Statistics') stats = spss.EvaluateXPath( tag, "/", """//pivotTable[@subType="Descriptive Statistics"]//dimension/category[last()]/*//cell/@number""" ) spss.DeleteXPathHandle(tag) return float(stats[-1])
def PCA(StandardizedPCAInput, varList, regionId): """ Use SPSS python api to perform PCA Arguments: PCAInput - 2d python list for PCA input varList - a list of variables for each columns in the PCA input Returns: CorrelationMatrix - Correlation matrix KMO - Kaiser-Mayer-Olkin value Bartlett_sig - Significance value of Bartlett's Sphericity Test Communalities - Communalities of extracted components VarExplainedInfo - Variance explained from unrotated solution, including absolute variance, % of variance, and cummulative % RotatedVarExplainedInfo - Rotated variance explained from unrotated solution, including absolute variance, % of variance, and cummulative % ComponentMatrix - Unrotated component loading matrix RotatedComponentMatrix - Rotated component loading matrix ComponentScoreCoefficientMatrix - Component score coefficient derived from rotated solution ComponentScore - Component score derived from score coefficient """ # SPSS command & dataset setup spss.Submit("NEW FILE") with spss.DataStep(): datasetObj = spss.Dataset() for var in varList: datasetObj.varlist.append(var) for row in StandardizedPCAInput: datasetObj.cases.append(row) if regionId == 18: debugFileOutputDir = r'C:\Users\hxiong\Dropbox\Haoyi Vulnerability\Simulation\Hurricane_Sandy' np.savetxt(np.array(StandardizedPCAInput), r'%s\PCAInut_r%d' % regionId, fmt='%.7f') spssPCASyntax = """FACTOR /VARIABLES {0} /MISSING LISTWISE /ANALYSIS {0} /PRINT UNIVARIATE INITIAL CORRELATION KMO EXTRACTION ROTATION FSCORE /CRITERIA MINEIGEN(1) ITERATE(25) /EXTRACTION PC /CRITERIA ITERATE(100) /ROTATION VARIMAX /SAVE REG(ALL) /METHOD=CORRELATION.""".format(' '.join(varList)) spss.SetOutput("off") varNum = len(varList) # Create XML output from SPSS tag = spssaux.CreateXMLOutput(spssPCASyntax, omsid='Factor Analysis') # Get correlation matrix CorrelationMatrix = spssaux.getValuesFromXmlWorkspace(tag, 'Correlation Matrix', cellAttrib="number") CorrelationMatrix = _spssOutputTableConversion(CorrelationMatrix, varNum, varNum) # Get KMO and Bartlett Plot_test sig. KMO_and_Bartlett = spssaux.getValuesFromXmlWorkspace( tag, 'KMO and Bartlett Test', cellAttrib="number") KMO_and_Bartlett = _spssOutputTableConversion(KMO_and_Bartlett, 1) NonpositiveDefiniteCorM = False KMO = 0. Bartlett_sig = 0. if (len(KMO_and_Bartlett) == 0): NonpositiveDefiniteCorM = True else: KMO = KMO_and_Bartlett[0] Bartlett_sig = KMO_and_Bartlett[3] # Get Communalities Communalities = spssaux.getValuesFromXmlWorkspace(tag, 'Communalities', colCategory="Extraction", cellAttrib="number") Communalities = _spssOutputTableConversion(Communalities, 1) # Get variances explained in unrotated solution VarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number""" ) PctVarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number""" ) CummulativePctVarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number""" ) VarExplained = _spssOutputTableConversion(VarExplained, 1) PctVarExplained = _spssOutputTableConversion(PctVarExplained, 1) CummulativePctVarExplained = _spssOutputTableConversion( CummulativePctVarExplained, 1) VarExplainedInfo = [ VarExplained, PctVarExplained, CummulativePctVarExplained ] # Get variances explained in rotated solution RotatedVarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number""" ) RotatedPctVarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number""" ) RotatedCummulativePctVarExplained = spss.EvaluateXPath( tag[0], "/outputTree", """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number""" ) RotatedVarExplained = _spssOutputTableConversion(RotatedVarExplained, 1) RotatedPctVarExplained = _spssOutputTableConversion( RotatedPctVarExplained, 1) RotatedCummulativePctVarExplained = _spssOutputTableConversion( RotatedCummulativePctVarExplained, 1) RotatedVarExplainedInfo = [ RotatedVarExplained, RotatedPctVarExplained, RotatedCummulativePctVarExplained ] # Get number of extracted components if (len(VarExplained) != len(RotatedVarExplained)): w = "Region %d: unrotated and rotated solution finds different number of component based on Kaiser Criterion." % regionId warnings.warn(w, RuntimeWarning) CompNum = len(VarExplained) ComponentScoreColumnIndex = [varNum + i for i in xrange(CompNum)] # Get component matrix ComponentMatrix = spssaux.getValuesFromXmlWorkspace(tag, 'Factor Matrix', cellAttrib="number") ComponentMatrix = _spssOutputTableConversion(ComponentMatrix, CompNum, varNum) # Get rotated component matrix RotatedComponentMatrix = spssaux.getValuesFromXmlWorkspace( tag, 'Rotated Factor Matrix', cellAttrib="number") RotatedComponentMatrix = _spssOutputTableConversion( RotatedComponentMatrix, CompNum, varNum) # Get component score coefficient matrix ComponentScoreCoefficientMatrix = spssaux.getValuesFromXmlWorkspace( tag, 'Factor Score Coefficient Matrix', cellAttrib="number") ComponentScoreCoefficientMatrix = _spssOutputTableConversion( ComponentScoreCoefficientMatrix, CompNum, varNum) # Get component score dataCursor = spss.Cursor(ComponentScoreColumnIndex) ComponentScore = dataCursor.fetchall() dataCursor.close() return CorrelationMatrix, NonpositiveDefiniteCorM, KMO, Bartlett_sig, Communalities, VarExplainedInfo, RotatedVarExplainedInfo, ComponentMatrix, RotatedComponentMatrix, ComponentScoreCoefficientMatrix, ComponentScore
def log_reg(dep,indep,cutoff,n): columns = ['dep','indep','cutoff','RR','RR_low','RR_high','SE','_00','_01','_10','_11','pool','%dep','%indep'] db = pd.DataFrame(columns=columns) db_err = pd.DataFrame(columns=columns) for x in range(0, 2): db.loc[n + x] = np.nan db.loc[n + x]['pool'] = 0 + x for key, val in {'dep': dep, 'indep': indep[:-7], 'cutoff': cutoff}.items(): db.loc[n + x][key] = val #########for exceptions db_err.loc[n + x] = np.nan db_err.loc[n + x]['pool'] = 0 + x for key, val in {'dep': dep, 'indep': indep[:-7], 'cutoff': cutoff}.items(): db_err.loc[n + x][key] = val cmd = """OMS SELECT TABLES /DESTINATION FORMAT=OXML XMLWORKSPACE='log_table'. CROSSTABS /TABLES={dep}_cut BY {indep}_cut /FORMAT=AVALUE TABLES /CELLS=COUNT /COUNT ROUND CELL. LOGISTIC REGRESSION VARIABLES {dep}_cut /METHOD=ENTER {indep}_cut /PRINT=CI(95) /CRITERIA=PIN(.05) POUT(.10) ITERATE(20) CUT(.5). OMSEND.""".format(dep=dep,indep=indep) #print(cmd) spss.Submit(cmd) context = "/outputTree" xpath_0_cells = "//pivotTable[@subType='Crosstabulation']//category[@varName='Imputation_' and @text='0']" \ "//group[@text='{dep}']//category[@varName='{indep}']//cell/@number".format(dep=dep+'_cut',indep=indep+'_cut') xpath_pool_cells = "//pivotTable[@subType='Crosstabulation']//category[@text='Pooled']" \ "//group[@text='{dep}']//category[@varName='{indep}']//cell/@number".format(dep=dep + '_cut', indep=indep + '_cut') xpath_0_log_reg = "//command[@command='Logistic Regression']//pivotTable[@subType='Variables in the Equation']" \ "//category[@label='Original data']//category[@text='{indep}']" \ "//category[@text='S.E.' or @text='Exp(B)' or @text='Lower' or @text='Upper']//cell/@number".format(indep=indep+'_cut') xpath_pool_log_reg = "//command[@command='Logistic Regression']//pivotTable[@subType='Variables in the Equation']" \ "//category[@text='Pooled']//category[@text='{indep}']" \ "//category[@text='S.E.' or @text='Exp(B)' or @text='Lower' or @text='Upper']//cell/@number".format(indep=indep + '_cut') try: _0_cells = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_0_cells)] pooled_cells = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_pool_cells)] except Exception as ex: print(n) print(type(ex).__name__) print(ex.args) db_err.loc[n]['_00'] = spss.EvaluateXPath('log_table', context, xpath_0_cells) db_err.loc[n+1]['_00'] = spss.EvaluateXPath('log_table', context, xpath_pool_cells) try: _0_log_reg = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_0_log_reg)] pool_log_reg = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_pool_log_reg)] except Exception as ex: print(n) print(type(ex).__name__) print(ex.args) try: _0_log_reg = spss.EvaluateXPath('log_table', context, xpath_0_log_reg) pool_log_reg = spss.EvaluateXPath('log_table', context, xpath_pool_log_reg) db_err.loc[n]['RR'] = _0_log_reg db_err.loc[n + 1]['RR'] = pool_log_reg except Exception as ex: print(n) _0_log_reg = [] pool_log_reg = [] try: db.loc[n]['_00','_01','_10','_11'] = _0_cells[0],_0_cells[1],_0_cells[2],_0_cells[3] db.loc[n]['SE','RR','RR_low','RR_high'] = _0_log_reg[0],_0_log_reg[1],_0_log_reg[2],_0_log_reg[3] except Exception as ex: print(n) print(type(ex).__name__) print(ex.args) db_err.loc[n]['_01'] = _0_cells db_err.loc[n]['SE'] = _0_log_reg try: db.loc[n+1]['_00', '_01', '_10', '_11'] = pooled_cells[0], pooled_cells[1], pooled_cells[2], pooled_cells[3] db.loc[n+1]['SE','RR','RR_low','RR_high'] = pool_log_reg[0],pool_log_reg[1],pool_log_reg[2],pool_log_reg[3] except Exception as ex: print(n) print(type(ex).__name__) print(ex.args) db_err.loc[n+1]['01'] = pooled_cells db_err.loc[n+1]['SE'] = pool_log_reg db['%dep'] = db[['_10','_11']].sum(axis=1)/db[['_00', '_01', '_10', '_11']].sum(axis=1) db['%indep'] = db[['_01','_11']].sum(axis=1) / db[['_00', '_01', '_10', '_11']].sum(axis=1) if n % 100 == 0 or (n + 1) % 100 == 0: print(n) return db,db_err
def SetMacroFromVariableSets(setnames=None, macroname=None, fail=False, outfile=None, sep=" "): """Define a macro consisting of all the variables in the specified variable sets. Return set of variables. setnames is a string or sequence of variable set names to include. These are not case sensitive. The union of the names will be returned in an arbitrary order. If not specified, all sets are included macroname is the name to assign to the macro. If not specified no macro is created. fail specifies whether or not to raise an exception if any set in the list is not found. By default, sets not found are ignored. sep is the separator string to use between variables if outfile is specified, the variable names are written to that file. If a macroname is given, the names are written with the syntax that defines the macro. For version 16 or later, the file is utf-8. For earlier versions it is written as plain text. The (Python) set of variables defined in the sets is returned.""" if setnames is not None and not _isseq(setnames): setnames = setnames.split() randomtag = "_SS_" + str(random.randint(0, 999999999)) spss.CreateXPathDictionary(randomtag) variables = set() try: if setnames is None: setvars = spss.EvaluateXPath(randomtag, "/", """//variableSetVariable/@name""") if setvars == [] and fail: raise ValueError("No set variables found") else: variables = set(setvars) else: setnames = [n.lower() for n in setnames] # requested names in lower case dssetnames = spss.EvaluateXPath( randomtag, "/", """//variableSet/@name""") #available names, actual case dssetnamesdict = dict([(n.lower(), n) for n in dssetnames ]) # key is lowercase, value is actual case for name in setnames: # retrieve requested names by actual case setvars = spss.EvaluateXPath( randomtag, "/", """/dictionary/variableSet[@name="%s"]/variableSetVariable/@name""" % dssetnamesdict.get(name, "")) if setvars == [] and fail: raise ValueError("Variable set name not found: %s" % name) variables.update(set(setvars)) finally: spss.DeleteXPathHandle(randomtag) # separator must contain whitespace or textwrap will not work properly if not (" " in sep or "\t" in sep): sep = " " + sep + " " if not macroname is None: tw = textwrap.wrap(sep.join(variables), 80, break_long_words=False) spss.SetMacroValue(macroname, "\n".join(tw)) if not outfile is None: if getSpssMajorVersion() >= 16: # write a utf-8 file f = codecs.open(outfile, "wb", encoding="utf_8_sig") else: f = open(outfile, "w") if not macroname is None: f.write("DEFINE %s ()\n" % macroname) tw = textwrap.wrap(sep.join(variables), 80, break_long_words=False) f.writelines([t + "\n" for t in tw]) if not macroname is None: f.write("!ENDDEFINE.\n") f.close() return variables