Exemple #1
0
def get_median_cut():
    cmd = ''
    cmd2 = ''

    input_dep = db_glob[db_glob['dependent'] == 1 & db_glob[version].notnull()]
    for dep in input_dep[version].unique():
        db = input_dep['items'][input_dep[version] == dep]
        for time in range(start,stop):
            list_of_dep_items = [prefix + str(time) + var for var in db]
            cmd += 'COMPUTE {var}=mean.1({var_list}).\n'.format(var=prefix+str(time)+dep,var_list=','.join(list_of_dep_items))
    cmd += 'EXECUTE.\n'
    #print(cmd)
    spss.Submit(cmd)
    list_of_dep = [prefix + str(start) + dep for dep in input_dep[version].unique()]
    tag, err = spssaux.CreateXMLOutput("""FREQUENCIES VARIABLES={vars}
        /STATISTICS=MEDIAN.\n""".format(vars=' '.join(list_of_dep)))
    spss.GetXmlUtf16(tag,folder+'/fest.xml')
    context = "/outputTree"
    xpath = "//category[@text='Median']/dimension[@text='Variables']/category[@variable='true']//@*[name()='number' or name()='varName']"
    median_list = spss.EvaluateXPath(tag, context, xpath)
    median_list = [[prefix + str(i) + median_list[n][4:],median_list[n+1]] for i in range(start,stop) for n in range(0,len(median_list),2)]
    for sublist in median_list:
        cmd2 += 'RECODE {var} (Lowest thru {mean}=0) (sysmis,77,88,99,0=sysmis) (else=1) into {var}_cut.\n'.format(var=sublist[0],mean=sublist[1])
    cmd2 += 'EXECUTE.\n'
    #print(cmd2)
    spss.Submit(cmd2)
    return list_of_dep
Exemple #2
0
def list_of_cut(indep):
    context = '/outputTree'
    xpath = '//pivotTable[@subType="Frequencies"]//group[@text="Valid"]//category/@number'
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    tag, err = spssaux.CreateXMLOutput('FREQUENCIES VARIABLES={}.'.format(indep))
    cut_list = spss.EvaluateXPath(tag, context, xpath)
    spss.DeleteXPathHandle(tag)
    return sorted(list((set(cut_list))))
def getAllDatasetNames():
    """Return a list of all dataset names currently in use in lower case"""
    
    tag = "D" + str(random.uniform(.1, 1))
    spss.Submit("""OMS select tables /IF COMMAND='Dataset Display'/DESTINATION xmlworkspace='%(tag)s' VIEWER=NO
        /TAG="%(tag)s".
    DATASET DISPLAY.
    OMSEND /TAG='%(tag)s'.""" % locals())
    
    # columns were added to Datasets pivot table in V22
    if v22ok:
        xpathexpr= '//pivotTable[@subType="Datasets"]//category/dimension[@axis="column"]/category[position()=1]/cell/@text'
    else:
        xpathexpr = '//pivotTable[@subType="Datasets"]//cell/@text'
    
    # if there are no real datasets, the name (unnamed) or its translation will be in the table
    ds = spss.EvaluateXPath(tag, "/", xpathexpr)
    spss.DeleteXPathHandle(tag)
    return [d.lower() for d in ds]
def GetWeightSum(varlist=None):
    """Return the sum of the case weights as calculated by DESCRIPTIVES
    
    varlist is an optional variable list that would cause cases to be listwise deleted
    If weights are not on, the return value is the unweighted number of cases."""

    if varlist is None:
        varlist = [spss.GetWeightVar()]
    if not varlist:
        varlist = ["V" + str(random.random(.1, 1))]
        spss.Submit("""TEMPORARY.
COMPUTE %s = 0.""" % varlist[0])
    tag, err = spssaux.CreateXMLOutput("DESCRIPTIVES %s /STATISTICS=MIN." %
                                       " ".join(varlist),
                                       omsid='Descriptives')
    #subtype='Descriptive Statistics')
    stats = spss.EvaluateXPath(
        tag, "/",
        """//pivotTable[@subType="Descriptive Statistics"]//dimension/category[last()]/*//cell/@number"""
    )
    spss.DeleteXPathHandle(tag)
    return float(stats[-1])
Exemple #5
0
def PCA(StandardizedPCAInput, varList, regionId):
    """ Use SPSS python api to perform PCA
         
        Arguments:
        PCAInput - 2d python list for PCA input
        varList - a list of variables for each columns in the PCA input
         
        Returns:
        CorrelationMatrix - Correlation matrix
        KMO - Kaiser-Mayer-Olkin value
        Bartlett_sig - Significance value of Bartlett's Sphericity Test
        Communalities - Communalities of extracted components
        VarExplainedInfo - Variance explained from unrotated solution, including absolute variance, % of variance, and cummulative %
        RotatedVarExplainedInfo - Rotated variance explained from unrotated solution, including absolute variance, % of variance, and cummulative %
        ComponentMatrix - Unrotated component loading matrix
        RotatedComponentMatrix - Rotated component loading matrix
        ComponentScoreCoefficientMatrix - Component score coefficient derived from rotated solution
        ComponentScore - Component score derived from score coefficient
    """

    # SPSS command & dataset setup
    spss.Submit("NEW FILE")
    with spss.DataStep():
        datasetObj = spss.Dataset()
        for var in varList:
            datasetObj.varlist.append(var)
        for row in StandardizedPCAInput:
            datasetObj.cases.append(row)
    if regionId == 18:
        debugFileOutputDir = r'C:\Users\hxiong\Dropbox\Haoyi Vulnerability\Simulation\Hurricane_Sandy'
        np.savetxt(np.array(StandardizedPCAInput),
                   r'%s\PCAInut_r%d' % regionId,
                   fmt='%.7f')
    spssPCASyntax = """FACTOR 
    /VARIABLES {0}
    /MISSING LISTWISE 
    /ANALYSIS {0}
    /PRINT UNIVARIATE INITIAL CORRELATION KMO EXTRACTION ROTATION FSCORE 
    /CRITERIA MINEIGEN(1) ITERATE(25) 
    /EXTRACTION PC 
    /CRITERIA ITERATE(100) 
    /ROTATION VARIMAX 
    /SAVE REG(ALL) 
    /METHOD=CORRELATION.""".format(' '.join(varList))
    spss.SetOutput("off")
    varNum = len(varList)
    # Create XML output from SPSS
    tag = spssaux.CreateXMLOutput(spssPCASyntax, omsid='Factor Analysis')
    # Get correlation matrix
    CorrelationMatrix = spssaux.getValuesFromXmlWorkspace(tag,
                                                          'Correlation Matrix',
                                                          cellAttrib="number")
    CorrelationMatrix = _spssOutputTableConversion(CorrelationMatrix, varNum,
                                                   varNum)
    # Get KMO and Bartlett Plot_test sig.
    KMO_and_Bartlett = spssaux.getValuesFromXmlWorkspace(
        tag, 'KMO and Bartlett Test', cellAttrib="number")
    KMO_and_Bartlett = _spssOutputTableConversion(KMO_and_Bartlett, 1)
    NonpositiveDefiniteCorM = False
    KMO = 0.
    Bartlett_sig = 0.
    if (len(KMO_and_Bartlett) == 0):
        NonpositiveDefiniteCorM = True
    else:
        KMO = KMO_and_Bartlett[0]
        Bartlett_sig = KMO_and_Bartlett[3]
    # Get Communalities
    Communalities = spssaux.getValuesFromXmlWorkspace(tag,
                                                      'Communalities',
                                                      colCategory="Extraction",
                                                      cellAttrib="number")
    Communalities = _spssOutputTableConversion(Communalities, 1)
    # Get variances explained in unrotated solution
    VarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number"""
    )
    PctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number"""
    )
    CummulativePctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number"""
    )
    VarExplained = _spssOutputTableConversion(VarExplained, 1)
    PctVarExplained = _spssOutputTableConversion(PctVarExplained, 1)
    CummulativePctVarExplained = _spssOutputTableConversion(
        CummulativePctVarExplained, 1)
    VarExplainedInfo = [
        VarExplained, PctVarExplained, CummulativePctVarExplained
    ]
    # Get variances explained in rotated solution
    RotatedVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number"""
    )
    RotatedPctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number"""
    )
    RotatedCummulativePctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number"""
    )
    RotatedVarExplained = _spssOutputTableConversion(RotatedVarExplained, 1)
    RotatedPctVarExplained = _spssOutputTableConversion(
        RotatedPctVarExplained, 1)
    RotatedCummulativePctVarExplained = _spssOutputTableConversion(
        RotatedCummulativePctVarExplained, 1)
    RotatedVarExplainedInfo = [
        RotatedVarExplained, RotatedPctVarExplained,
        RotatedCummulativePctVarExplained
    ]
    # Get number of extracted components
    if (len(VarExplained) != len(RotatedVarExplained)):
        w = "Region %d: unrotated and rotated solution finds different number of component based on Kaiser Criterion." % regionId
        warnings.warn(w, RuntimeWarning)
    CompNum = len(VarExplained)
    ComponentScoreColumnIndex = [varNum + i for i in xrange(CompNum)]
    # Get component matrix
    ComponentMatrix = spssaux.getValuesFromXmlWorkspace(tag,
                                                        'Factor Matrix',
                                                        cellAttrib="number")
    ComponentMatrix = _spssOutputTableConversion(ComponentMatrix, CompNum,
                                                 varNum)
    # Get rotated component matrix
    RotatedComponentMatrix = spssaux.getValuesFromXmlWorkspace(
        tag, 'Rotated Factor Matrix', cellAttrib="number")
    RotatedComponentMatrix = _spssOutputTableConversion(
        RotatedComponentMatrix, CompNum, varNum)
    # Get component score coefficient matrix
    ComponentScoreCoefficientMatrix = spssaux.getValuesFromXmlWorkspace(
        tag, 'Factor Score Coefficient Matrix', cellAttrib="number")
    ComponentScoreCoefficientMatrix = _spssOutputTableConversion(
        ComponentScoreCoefficientMatrix, CompNum, varNum)
    # Get component score
    dataCursor = spss.Cursor(ComponentScoreColumnIndex)
    ComponentScore = dataCursor.fetchall()
    dataCursor.close()
    return CorrelationMatrix, NonpositiveDefiniteCorM, KMO, Bartlett_sig, Communalities, VarExplainedInfo, RotatedVarExplainedInfo, ComponentMatrix, RotatedComponentMatrix, ComponentScoreCoefficientMatrix, ComponentScore
Exemple #6
0
def log_reg(dep,indep,cutoff,n):
    columns = ['dep','indep','cutoff','RR','RR_low','RR_high','SE','_00','_01','_10','_11','pool','%dep','%indep']
    db = pd.DataFrame(columns=columns)
    db_err = pd.DataFrame(columns=columns)
    for x in range(0, 2):
        db.loc[n + x] = np.nan
        db.loc[n + x]['pool'] = 0 + x
        for key, val in {'dep': dep, 'indep': indep[:-7], 'cutoff': cutoff}.items():
            db.loc[n + x][key] = val
        #########for exceptions
        db_err.loc[n + x] = np.nan
        db_err.loc[n + x]['pool'] = 0 + x
        for key, val in {'dep': dep, 'indep': indep[:-7], 'cutoff': cutoff}.items():
            db_err.loc[n + x][key] = val

    cmd = """OMS SELECT TABLES
/DESTINATION FORMAT=OXML XMLWORKSPACE='log_table'.
CROSSTABS
  /TABLES={dep}_cut BY {indep}_cut
  /FORMAT=AVALUE TABLES
  /CELLS=COUNT
  /COUNT ROUND CELL.
LOGISTIC REGRESSION VARIABLES {dep}_cut
  /METHOD=ENTER {indep}_cut
  /PRINT=CI(95)
  /CRITERIA=PIN(.05) POUT(.10) ITERATE(20) CUT(.5).
OMSEND.""".format(dep=dep,indep=indep)
    #print(cmd)
    spss.Submit(cmd)
    context = "/outputTree"
    xpath_0_cells = "//pivotTable[@subType='Crosstabulation']//category[@varName='Imputation_' and @text='0']" \
            "//group[@text='{dep}']//category[@varName='{indep}']//cell/@number".format(dep=dep+'_cut',indep=indep+'_cut')
    xpath_pool_cells = "//pivotTable[@subType='Crosstabulation']//category[@text='Pooled']" \
            "//group[@text='{dep}']//category[@varName='{indep}']//cell/@number".format(dep=dep + '_cut',
                                                                                        indep=indep + '_cut')
    xpath_0_log_reg = "//command[@command='Logistic Regression']//pivotTable[@subType='Variables in the Equation']" \
                      "//category[@label='Original data']//category[@text='{indep}']" \
                      "//category[@text='S.E.' or @text='Exp(B)' or @text='Lower' or @text='Upper']//cell/@number".format(indep=indep+'_cut')
    xpath_pool_log_reg = "//command[@command='Logistic Regression']//pivotTable[@subType='Variables in the Equation']" \
                      "//category[@text='Pooled']//category[@text='{indep}']" \
                      "//category[@text='S.E.' or @text='Exp(B)' or @text='Lower' or @text='Upper']//cell/@number".format(indep=indep + '_cut')

    try:
        _0_cells = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_0_cells)]
        pooled_cells = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_pool_cells)]
    except Exception as ex:
        print(n)
        print(type(ex).__name__)
        print(ex.args)
        db_err.loc[n]['_00'] = spss.EvaluateXPath('log_table', context, xpath_0_cells)
        db_err.loc[n+1]['_00'] = spss.EvaluateXPath('log_table', context, xpath_pool_cells)
    try:
        _0_log_reg = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_0_log_reg)]
        pool_log_reg = [float(x) for x in spss.EvaluateXPath('log_table', context, xpath_pool_log_reg)]
    except Exception as ex:
        print(n)
        print(type(ex).__name__)
        print(ex.args)
        try:
            _0_log_reg = spss.EvaluateXPath('log_table', context, xpath_0_log_reg)
            pool_log_reg = spss.EvaluateXPath('log_table', context, xpath_pool_log_reg)
            db_err.loc[n]['RR'] = _0_log_reg
            db_err.loc[n + 1]['RR'] = pool_log_reg
        except Exception as ex:
            print(n)
            _0_log_reg = []
            pool_log_reg = []
    try:
        db.loc[n]['_00','_01','_10','_11'] = _0_cells[0],_0_cells[1],_0_cells[2],_0_cells[3]
        db.loc[n]['SE','RR','RR_low','RR_high'] = _0_log_reg[0],_0_log_reg[1],_0_log_reg[2],_0_log_reg[3]
    except Exception as ex:
        print(n)
        print(type(ex).__name__)
        print(ex.args)
        db_err.loc[n]['_01'] = _0_cells
        db_err.loc[n]['SE'] = _0_log_reg
    try:
        db.loc[n+1]['_00', '_01', '_10', '_11'] =  pooled_cells[0], pooled_cells[1], pooled_cells[2], pooled_cells[3]
        db.loc[n+1]['SE','RR','RR_low','RR_high'] = pool_log_reg[0],pool_log_reg[1],pool_log_reg[2],pool_log_reg[3]
    except Exception as ex:
        print(n)
        print(type(ex).__name__)
        print(ex.args)
        db_err.loc[n+1]['01'] = pooled_cells
        db_err.loc[n+1]['SE'] = pool_log_reg

    db['%dep'] = db[['_10','_11']].sum(axis=1)/db[['_00', '_01', '_10', '_11']].sum(axis=1)
    db['%indep'] = db[['_01','_11']].sum(axis=1) / db[['_00', '_01', '_10', '_11']].sum(axis=1)
    if n % 100 == 0 or (n + 1) % 100 == 0:
        print(n)
    return db,db_err
def SetMacroFromVariableSets(setnames=None,
                             macroname=None,
                             fail=False,
                             outfile=None,
                             sep=" "):
    """Define a macro consisting of all the variables in the specified variable sets.  Return set of variables.
    
    setnames is a string or sequence of variable set names to include.  These are not case sensitive.
    The union of the names will be returned in an arbitrary order.  If not specified, all sets are included
    macroname is the name to assign to the macro.  If not specified no macro is created.
    fail specifies whether or not to raise an exception if any set in the list is not found.
    By default, sets not found are ignored.
    sep is the separator string to use between variables
    if outfile is specified, the variable names are written to that file.  If a macroname is given,
    the names are written with the syntax that defines the macro.
    For version 16 or later, the file is utf-8.  For earlier versions it is written as plain text.
    
    The (Python) set of variables defined in the sets is returned."""

    if setnames is not None and not _isseq(setnames):
        setnames = setnames.split()
    randomtag = "_SS_" + str(random.randint(0, 999999999))
    spss.CreateXPathDictionary(randomtag)
    variables = set()
    try:
        if setnames is None:
            setvars = spss.EvaluateXPath(randomtag, "/",
                                         """//variableSetVariable/@name""")
            if setvars == [] and fail:
                raise ValueError("No set variables found")
            else:
                variables = set(setvars)
        else:
            setnames = [n.lower()
                        for n in setnames]  # requested names in lower case
            dssetnames = spss.EvaluateXPath(
                randomtag, "/",
                """//variableSet/@name""")  #available names, actual case
            dssetnamesdict = dict([(n.lower(), n) for n in dssetnames
                                   ])  # key is lowercase, value is actual case
            for name in setnames:
                # retrieve requested names by actual case
                setvars = spss.EvaluateXPath(
                    randomtag, "/",
                    """/dictionary/variableSet[@name="%s"]/variableSetVariable/@name"""
                    % dssetnamesdict.get(name, ""))
                if setvars == [] and fail:
                    raise ValueError("Variable set name not found: %s" % name)
                variables.update(set(setvars))
    finally:
        spss.DeleteXPathHandle(randomtag)

    # separator must contain whitespace or textwrap will not work properly
    if not (" " in sep or "\t" in sep):
        sep = " " + sep + " "
    if not macroname is None:
        tw = textwrap.wrap(sep.join(variables), 80, break_long_words=False)
        spss.SetMacroValue(macroname, "\n".join(tw))
    if not outfile is None:
        if getSpssMajorVersion() >= 16:  # write a utf-8 file
            f = codecs.open(outfile, "wb", encoding="utf_8_sig")
        else:
            f = open(outfile, "w")
        if not macroname is None:
            f.write("DEFINE %s ()\n" % macroname)
        tw = textwrap.wrap(sep.join(variables), 80, break_long_words=False)
        f.writelines([t + "\n" for t in tw])
        if not macroname is None:
            f.write("!ENDDEFINE.\n")
        f.close()

    return variables