def GetWeightSum(varlist=None):
    """Return the sum of the case weights as calculated by DESCRIPTIVES
    
    varlist is an optional variable list that would cause cases to be listwise deleted
    If weights are not on, the return value is the unweighted number of cases."""

    if varlist is None:
        varlist = [spss.GetWeightVar()]
    if not varlist:
        varlist = ["V" + str(random.random(.1, 1))]
        spss.Submit("""TEMPORARY.
COMPUTE %s = 0.""" % varlist[0])
    tag, err = spssaux.CreateXMLOutput("DESCRIPTIVES %s /STATISTICS=MIN." %
                                       " ".join(varlist),
                                       omsid='Descriptives')
    #subtype='Descriptive Statistics')
    stats = spss.EvaluateXPath(
        tag, "/",
        """//pivotTable[@subType="Descriptive Statistics"]//dimension/category[last()]/*//cell/@number"""
    )
    spss.DeleteXPathHandle(tag)
    return float(stats[-1])
Esempio n. 2
0
def weightedkappaextension(variables, wttype=1, cilevel=95):

    varnames = expandvarnames(variables)
    caption = varnames[0] + _(" vs. ") + varnames[1]
    vardict = spssaux.VariableDict(varnames)
    if len(vardict) != len(varnames):
        spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""An invalid variable has been specified. This command is not executed."""
              ))
        spss.EndProcedure()
    elif len(varnames) != 2:
        spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""Exactly two variables must be specified. This command is not executed."""
              ))
        spss.EndProcedure()
    else:
        try:
            warntext = []
            if cilevel < 50:
                warntext.append(
                    _("CILEVEL cannot be less than 50%. It has been set to 50%."
                      ))
                cilevel = 50
            if cilevel > 99.999:
                warntext.append(
                    _("CILEVEL cannot be greater than 99.999%. It has been set to 99.999%."
                      ))
                cilevel = 99.999
            if cilevel == int(cilevel):
                cilevel = int(cilevel)
            if wttype != 1:
                if wttype != 2:
                    warntext.append(
                        _("WTTYPE must be 1 or 2. It has been set to 1."))
                    wttype = 1
            varlist = varnames[0] + ' ' + varnames[1]
            spss.Submit("PRESERVE.")
            tempdir = tempfile.gettempdir()
            spss.Submit("""CD "%s".""" % tempdir)
            wtvar = spss.GetWeightVar()
            if wtvar != None:
                spss.Submit(r"""
COMPUTE %s=RND(%s).""" % (wtvar, wtvar))
                spss.Submit(r"""
EXECUTE.""")
            maxloops = 2 * spss.GetCaseCount()
            spss.Submit("""SET PRINTBACK=OFF MPRINT=OFF MXLOOPS=%s.""" %
                        maxloops)
            activeds = spss.ActiveDataset()
            if activeds == "*":
                activeds = "D" + str(random.uniform(.1, 1))
                spss.Submit("DATASET NAME %s" % activeds)
            tmpvar1 = "V" + str(random.uniform(.1, 1))
            tmpvar2 = "V" + str(random.uniform(.1, 1))
            tmpvar3 = "V" + str(random.uniform(.1, 1))
            tmpvar4 = "V" + str(random.uniform(.1, 1))
            tmpvar5 = "V" + str(random.uniform(.1, 1))
            tmpvar6 = "V" + str(random.uniform(.1, 1))
            tmpdata1 = "D" + str(random.uniform(.1, 1))
            tmpdata2 = "D" + str(random.uniform(.1, 1))
            omstag1 = "T" + str(random.uniform(.1, 1))
            omstag2 = "T" + str(random.uniform(.1, 1))
            omstag3 = "T" + str(random.uniform(.1, 1))
            omstag4 = "T" + str(random.uniform(.1, 1))
            omstag5 = "T" + str(random.uniform(.1, 1))
            omstag6 = "T" + str(random.uniform(.1, 1))
            tmpfile1 = "F" + str(random.uniform(.1, 1))
            tmpfile2 = "F" + str(random.uniform(.1, 1))
            lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel
            upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel
            spss.Submit(r"""
DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1)
            spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1)
            filt = spssaux.GetSHOW("FILTER", olang="english")
            if filt != "No case filter is in effect":
                filtcond = filt.strip("(FILTER)")
                select = "SELECT IF " + str(filtcond) + "."
                spss.Submit("""%s""" % select)
                spss.Submit("""EXECUTE.""")
                spss.Submit("""USE ALL.""")
            banana = spssaux.getDatasetInfo(Info="SplitFile")
            if banana != "":
                warntext.append(_("This procedure ignores split file status."))
                spss.Submit(r"""SPLIT FILE OFF.""")
            spss.Submit(r"""
COUNT %s=%s (MISSING).""" % (tmpvar1, varlist))
            spss.Submit(r"""
SELECT IF %s=0.""" % tmpvar1)
            spss.Submit(r"""
EXECUTE.""")
            validn = spss.GetCaseCount()
            if validn < 2:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""There are too few complete cases. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
            else:
                spss.Submit(r"""
AGGREGATE
   /OUTFILE=* MODE=ADDVARIABLES
   /%s=SD(%s)
   /%s=SD(%s).""" % (tmpvar2, varnames[0], tmpvar3, varnames[1]))
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                datarow = cur.fetchone()
                cur.close()
                sd1 = datarow[-2]
                sd2 = datarow[-1]
                if min(sd1, sd2) == 0:
                    spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                    spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
                    table = spss.BasePivotTable("Warnings ", "Warnings")
                    table.Append(spss.Dimension.Place.row,
                                 "rowdim",
                                 hideLabels=True)
                    rowLabel = CellText.String("1")
                    table[(rowLabel, )] = CellText.String(
                        _("""All ratings are the same for at least one rater. This command is not executed."""
                          ))
                    spss.EndProcedure()
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                else:
                    if len(warntext) > 0:
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        if len(warntext) == 1:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String("%s" %
                                                                  warntext[0])
                            spss.EndProcedure()
                        if len(warntext) == 2:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String(
                                "%s \n"
                                "%s" % (warntext[0], warntext[1]))
                            spss.EndProcedure()
                        if len(warntext) == 3:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String(
                                "%s \n"
                                "%s \n"
                                "%s" % (warntext[0], warntext[1], warntext[2]))
                            spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    spss.Submit(r"""
DELETE VARIABLES %s %s.""" % (tmpvar2, tmpvar3))
                    spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile1, varlist, tmpvar4))
                    spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Variables to Cases'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                    spss.Submit(r"""
VARSTOCASES
  /MAKE %s FROM %s.""" % (tmpvar5, varlist))
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
                    catdata = []
                    try:
                        cur = spss.Cursor(isBinary=False)
                    except:
                        cur = spss.Cursor()
                    while True:
                        datarow = cur.fetchone()
                        if datarow is None:
                            break
                        catdata.append(datarow[-1])
                    cur.close()
                    cats = list(set(catdata))
                    cattest = 0
                    if any(item != round(item) for item in cats):
                        cattest = 1
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            _("""Some ratings are not integers. This command is not executed."""
                              ))
                        spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    elif min(cats) < 1.0:
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            _("""Some ratings are less than 1. This command is not executed."""
                              ))
                        spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    else:
                        spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile2, tmpvar5, tmpvar6))
                        spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN""" % tmpdata2)
                        spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Matrix'] 
 /DESTINATION VIEWER=NO
 /TAG='"%s"'.""" % omstag3)
                        spss.Submit(r"""
MATRIX.
GET x 
  /FILE=%s
  /VARIABLES=%s %s.
GET ratecats
  /FILE=%s
  /VARIABLES=%s.
COMPUTE size=MMAX(ratecats).
COMPUTE y=MAKE(size,size,0).
LOOP i=1 to NROW(y).
+ LOOP j=1 to NCOL(y).
+   LOOP k=1 to NROW(x).
+     DO IF (x(k,1)=i and x(k,2)=j).
+       COMPUTE y(i,j)=x(k,3).
+     END IF.
+   END LOOP.
+ END LOOP.
END LOOP.
COMPUTE wttype=%s.
COMPUTE wt=MAKE(NROW(y),NCOL(y),0).
LOOP i=1 to NROW(y).
+ LOOP j=1 to NCOL(y).
+   DO IF wttype=1.
+     COMPUTE wt(i,j)=1-(ABS(i-j)/(size-1)).
+   ELSE IF wttype=2.
+     COMPUTE wt(i,j)=1-((i-j)/(NROW(y)-1))**2.
+   END IF.
+ END LOOP.
END LOOP.
COMPUTE n=MSUM(y).
COMPUTE prop=y/n.
COMPUTE p_i=RSUM(prop).
COMPUTE p_j=CSUM(prop).
COMPUTE w_i=(wt*T(p_j))*MAKE(1,size,1).
COMPUTE w_j=MAKE(size,1,1)*(T(p_i)*wt).
COMPUTE po=MSUM(wt&*prop).
COMPUTE pe=MSUM(MDIAG(p_i)*wt*MDIAG(p_j)).
COMPUTE kstat=(po-pe)/(1-pe).
COMPUTE var0=(T(p_i)*((wt-(w_i+w_j))&**2)*T(p_j)-pe**2)/(n*(1-pe)**2).
DO IF var0>=0.
+ COMPUTE ase0=SQRT(var0).
ELSE.
+ COMPUTE ase0=-1.
END IF.
DO IF ase0>0.
+ COMPUTE z=kstat/ase0.
+ COMPUTE sig=1-CHICDF(z**2,1).
ELSE.
+ COMPUTE z=-1.
+ COMPUTE sig=-1.
END IF.
COMPUTE var1=(MSUM((prop&*((wt-(w_i+w_j)&*(1-kstat))&**2)))-(kstat-pe*(1-kstat))**2)/(n*(1-pe)**2).
DO IF var1>=0.
+ COMPUTE ase1=SQRT(var1).
ELSE.
+ COMPUTE ase1=-1.
END IF.
SAVE {wttype,kstat,ase1,z,sig,ase0}
   /OUTFILE=%s
   /VARIABLES=wttype,kstat,ase1,z,sig,ase0.
END MATRIX.""" % (tmpfile1, varlist, tmpvar4, tmpfile2, tmpvar5, wttype,
                        tmpdata2))
                        spss.Submit(r"""
OMSEND TAG=['"%s"'].""" % omstag3)
                        spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2)
                        spss.Submit(r"""
DO IF ase0=-1.
+ RECODE z sig (-1=SYSMIS).
END IF.
EXECUTE.
DELETE VARIABLES ase0.
RECODE ase1 (-1=SYSMIS).
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel)
                        spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel)
                        spss.Submit(r"""
FORMATS kstat ase1 z sig lower upper (F11.3).
VARIABLE LABELS kstat %s.""" % _smartquote(_("""Kappa""")))
                        spss.Submit(r"""
VARIABLE LABELS ase1 %s.""" % _smartquote(_("""Asymptotic Standard Error""")))
                        spss.Submit(r"""
VARIABLE LABELS z %s.""" % _smartquote(_("""Z""")))
                        spss.Submit(r"""
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                        spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                        spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                        if wttype == 1:
                            spss.Submit(r"""
VARIABLE LABELS wttype %s.""" % _smartquote(_("""Linear""")))
                        if wttype == 2:
                            spss.Submit(r"""
VARIABLE LABELS wttype %s.""" % _smartquote(_("""Quadratic""")))
                        spss.Submit(r"""
EXECUTE.
""")
                        spss.Submit(r"""
OMS
  /SELECT TABLES 
  /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
  /DESTINATION VIEWER=NO
  /TAG = '"%s"'.""" % omstag4)
                        spss.Submit(r"""
OMS
  /SELECT TEXTS
  /IF COMMANDS=['Weighted Kappa'] LABELS=['Active Dataset']
  /DESTINATION VIEWER=NO
  /TAG = '"%s"'.""" % omstag5)
                        if len(warntext) > 0:
                            spss.Submit(r"""
OMS
 /SELECT HEADINGS
 /IF COMMANDS=['Weighted Kappa']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag6)
                        try:
                            cur = spss.Cursor(isBinary=False)
                        except:
                            cur = spss.Cursor()
                        data = cur.fetchone()
                        cur.close()
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable(_("Weighted Kappa"),
                                                    "Kappa",
                                                    caption=caption)
                        table.SimplePivotTable(rowdim = _("Weighting"),
                           rowlabels = [CellText.String(spss.GetVariableLabel(0))],
                           coldim = "",
                           collabels = [spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3),spss.GetVariableLabel(4), \
                                             spss.GetVariableLabel(5),spss.GetVariableLabel(6)],
                           cells = [data[1],data[2],data[3],data[4],data[5],data[6]])
                        spss.EndProcedure()
                        if len(warntext) > 0:
                            spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag6)
        finally:
            try:
                spss.Submit(r"""
DATASET CLOSE %s.""" % tmpdata1)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds)
                if validn >= 2:
                    if min(sd1, sd2) > 0:
                        if cattest == 0:
                            if min(cats) >= 1:
                                spss.Submit(r"""
OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5))
                                spss.Submit(r"""
DATASET CLOSE %s.""" % tmpdata2)
                                spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile2)
                        spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile1)
            except:
                pass
            spss.Submit(r"""
RESTORE.
""")
def fleisskappaextension(variables, cilevel=95):

    varnames = expandvarnames(variables)
    vardict = spssaux.VariableDict(varnames)
    if len(vardict) != len(varnames):
        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""An invalid variable has been specified. This command is not executed."""
              ))
        spss.EndProcedure()
    elif len(varnames) < 2:
        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""At least two variables must be specified. This command is not executed."""
              ))
        spss.EndProcedure()

    else:
        try:
            warntext = []
            if cilevel < 50:
                warntext.append(
                    _("CILEVEL cannot be less than 50%. It has been reset to 50%."
                      ))
                cilevel = 50
            if cilevel > 99.999:
                warntext.append(
                    _("CILEVEL cannot be greater than 99.999%. It has been reset to 99.999%."
                      ))
                cilevel = 99.999
            if cilevel == int(cilevel):
                cilevel = int(cilevel)
            varlist = varnames[0]
            for i in range(1, len(varnames)):
                varlist = varlist + ' ' + varnames[i]
            spss.Submit("PRESERVE.")
            tempdir = tempfile.gettempdir()
            spss.Submit("""CD "%s".""" % tempdir)
            wtvar = spss.GetWeightVar()
            if wtvar != None:
                spss.Submit(r"""
COMPUTE %s=RND(%s).""" % (wtvar, wtvar))
                spss.Submit(r"""
EXECUTE.""")
                wtdn = GetWeightSum(varnames)
            else:
                wtdn = spss.GetCaseCount()
            maxloops = wtdn + 1
            spss.Submit(
                """SET PRINTBACK=OFF MPRINT=OFF OATTRS=ENG MXLOOPS=%s.""" %
                maxloops)
            activeds = spss.ActiveDataset()
            if activeds == "*":
                activeds = "D" + str(random.uniform(.1, 1))
                spss.Submit("DATASET NAME %s" % activeds)
            tmpvar1 = "V" + str(random.uniform(.1, 1))
            tmpvar2 = "V" + str(random.uniform(.1, 1))
            tmpvar3 = "V" + str(random.uniform(.1, 1))
            tmpfile1 = "F" + str(random.uniform(.1, 1))
            tmpfile2 = "F" + str(random.uniform(.1, 1))
            tmpdata1 = "D" + str(random.uniform(.1, 1))
            tmpdata2 = "D" + str(random.uniform(.1, 1))
            tmpdata3 = "D" + str(random.uniform(.1, 1))
            omstag1 = "T" + str(random.uniform(.1, 1))
            omstag2 = "T" + str(random.uniform(.1, 1))
            omstag3 = "T" + str(random.uniform(.1, 1))
            omstag4 = "T" + str(random.uniform(.1, 1))
            omstag5 = "T" + str(random.uniform(.1, 1))
            omstag6 = "T" + str(random.uniform(.1, 1))
            lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel
            upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel
            spss.Submit(r"""
DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1)
            spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1)
            filt = spssaux.GetSHOW("FILTER", olang="english")
            if filt != "No case filter is in effect":
                filtcond = filt.strip("(FILTER)")
                select = "SELECT IF " + str(filtcond) + "."
                spss.Submit("""%s""" % select)
                spss.Submit("""EXECUTE.""")
                spss.Submit("""USE ALL.""")
            banana = spssaux.getDatasetInfo(Info="SplitFile")
            if banana != "":
                warntext.append(_("This command ignores split file status."))
                spss.Submit(r"""SPLIT FILE OFF.""")
            spss.Submit(r"""
COUNT %s=%s (MISSING).""" % (tmpvar1, varlist))
            spss.Submit(r"""
SELECT IF %s=0.""" % tmpvar1)
            spss.Submit(r"""
EXECUTE.
MISSING VALUES ALL ().""")
            validn = spss.GetCaseCount()
            if wtvar == None:
                spss.Submit(r"""
SAVE OUTFILE=%s.""" % tmpfile1)
            else:
                spss.Submit(r"""
DO IF %s >= 1.""" % wtvar)
                spss.Submit(r"""
+ LOOP #i=1 TO %s.""" % wtvar)
                spss.Submit(r"""
XSAVE OUTFILE=%s
  /KEEP=%s
  /DROP=%s.""" % (tmpfile1, varlist, wtvar))
                spss.Submit(r"""
+ END LOOP.
END IF.
EXECUTE.
""")
            spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Variables to Cases'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
            spss.Submit(r"""
VARSTOCASES
  /MAKE %s FROM %s.""" % (tmpvar2, varlist))
            spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
            catdata = []
            try:
                cur = spss.Cursor(isBinary=False)
            except:
                cur = spss.Cursor()
            while True:
                datarow = cur.fetchone()
                if datarow is None:
                    break
                catdata.append(datarow[-1])
            cur.close()
            cats = list(set(catdata))
            ncats = len(cats)
            nraters = len(varnames)
            neededn = max(ncats, nraters)
            if validn < neededn:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""There are too few complete cases. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
            elif ncats < 2:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""All ratings are the same. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
            else:
                if len(warntext) > 0:
                    spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                    if len(warntext) == 1:
                        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String("%s" %
                                                              warntext[0])
                        spss.EndProcedure()
                    if len(warntext) == 2:
                        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            "%s \n"
                            "%s" % (warntext[0], warntext[1]))
                        spss.EndProcedure()
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
                spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile2, tmpvar2, tmpvar3))
                spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata2)
                spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata3)
                spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Matrix'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag3)
                spss.Submit(r"""
MATRIX.
GET x 
  /FILE=%s
  /VARIABLES=%s.
GET ratecats
  /FILE=%s
  /VARIABLES=%s.
COMPUTE n=NROW(x).
COMPUTE c=NROW(ratecats).
COMPUTE y=MAKE(n,c,0).
LOOP i=1 to n.
+ LOOP j=1 to NCOL(x).
+   LOOP k=1 to c.
+     DO IF x(i,j)=ratecats(k).
+       COMPUTE y(i,k)=y(i,k)+1.
+     END IF.
+   END LOOP.
+ END LOOP.
END LOOP.
COMPUTE k=NCOL(x).
COMPUTE pe=MSUM((CSUM(y)/MSUM(y))&**2).
COMPUTE pa=MSSQ(y)/(NROW(y)*k*(k-1))-(1/(k-1)).
COMPUTE kstat=(pa-pe)/(1-pe).
COMPUTE cp=(CSSQ(y)-CSUM(y))&/((k-1)&*CSUM(y)).
COMPUTE pj=CSUM(y)/MSUM(y).
COMPUTE one=MAKE(1,NCOL(pj),1).
COMPUTE qj=one-pj.
COMPUTE kj=(cp-pj)&/qj.
COMPUTE num=2*((pj*t(qj))**2-MSUM(pj&*qj&*(qj-pj))).
COMPUTE den=n*k*(k-1)*((pj*t(qj))**2).
COMPUTE ase=SQRT(num/den).
COMPUTE z=kstat/ase.
COMPUTE sig=1-CHICDF(z**2,1).
SAVE {kstat,ase,z,sig}
   /OUTFILE=%s
   /VARIABLES=kstat,ase,z,sig.
COMPUTE asej=MAKE(1,c,SQRT(2/(n*k*(k-1)))).
COMPUTE zj=kj&/asej.
COMPUTE sigj=one-CHICDF(zj&**2,1).
SAVE {ratecats,t(cp),t(kj),t(asej),t(zj),t(sigj)}
  /OUTFILE=%s
  /VARIABLES=category,cp,kstat,ase,z,sig.
END MATRIX.""" % (tmpfile1, varlist, tmpfile2, tmpvar2, tmpdata2, tmpdata3))
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag3)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2)
                spss.Submit(r"""
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
FORMATS kstat ase z sig lower upper (F11.3).
VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa""")))
                spss.Submit(r"""
VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error""")))
                spss.Submit(r"""
VARIABLE LABELS z %s. """ % _smartquote(_("""Z""")))
                spss.Submit(r"""
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                spss.Submit(r"""
EXECUTE.
""")
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                data1 = cur.fetchone()
                cur.close()
                collabels1=[spss.GetVariableLabel(0),spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \
                                         spss.GetVariableLabel(4),spss.GetVariableLabel(5)]
                celldata1 = [
                    data1[0], data1[1], data1[2], data1[3], data1[4], data1[5]
                ]
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata3)
                spss.Submit(r"""
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
FORMATS category (F10.0) cp kstat ase z sig lower upper (F11.3).
VARIABLE LABELS category %s. """ % _smartquote(_("""Rating Category""")))
                spss.Submit(r"""
VARIABLE LABELS cp %s. """ % _smartquote(_("""Conditional Probability""")))
                spss.Submit(r"""
VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa""")))
                spss.Submit(r"""
VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error""")))
                spss.Submit(r"""                
VARIABLE LABELS z %s. """ % _smartquote(_("""Z""")))
                spss.Submit(r""" 
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                spss.Submit(r""" 
EXECUTE.""")
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag4)
                spss.Submit(r"""
OMS
 /SELECT TEXTS
 /IF COMMANDS=['Fleiss Kappa'] LABELS=['Active Dataset']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag5)
                if len(warntext) > 0:
                    spss.Submit(r"""
OMS
 /SELECT HEADINGS
 /IF COMMANDS=['Fleiss Kappa']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag6)
                n = spss.GetCaseCount
                rlabels = []
                data2 = []
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                for i in range(0, spss.GetCaseCount()):
                    datarow = cur.fetchone()
                    data2.append(datarow[1:])
                    rlabels.append(datarow[0])
                cur.close()

                def _flatten(seq):
                    for item in seq:
                        if spssaux._isseq(item):
                            for subitem in _flatten(item):
                                yield subitem
                        else:
                            yield item

                data2 = [item for item in _flatten(data2)]
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table1 = spss.BasePivotTable(_("Overall Kappa"),
                                             "Overall Kappa")
                table1.SimplePivotTable(rowdim=_(""),
                                        rowlabels=[CellText.String("Overall")],
                                        coldim="",
                                        collabels=collabels1,
                                        cells=celldata1)
                if any(item != round(item) for item in rlabels):
                    caption = (_(
                        "Non-integer rating category values are truncated for presentation."
                    ))
                else:
                    caption = ("")
                table2 = spss.BasePivotTable(
                    _("Kappas for Individual Categories"),
                    _("Individual Category Kappa Statistics"),
                    caption=caption)
                rowlabels = [(CellText.String("{:>9.0f}".format(rlabels[i])))
                             for i in range(len(rlabels))]
                collabels=[spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \
                      spss.GetVariableLabel(4),spss.GetVariableLabel(5),spss.GetVariableLabel(6), \
                      spss.GetVariableLabel(7)]
                table2.SimplePivotTable(rowdim=_("  Rating Category"),
                                        rowlabels=rowlabels,
                                        coldim="",
                                        collabels=collabels,
                                        cells=data2)
                spss.EndProcedure()
                if len(warntext) > 0:
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag6)
        finally:
            try:
                spss.Submit("""
DATASET CLOSE %s.""" % tmpdata1)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds)
                if validn >= neededn:
                    if ncats >= 2:
                        spss.Submit("""
OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5))
                        spss.Submit("""
DATASET CLOSE %s.""" % tmpdata2)
                        spss.Submit("""
DATASET CLOSE %s.""" % tmpdata3)
                        spss.Submit("""
ERASE FILE=%s.""" % tmpfile1)
                        spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile2)
            except:
                pass
            spss.Submit("""
RESTORE.
""")
Esempio n. 4
0
def rake(info, variables, marginals,finalweight, visible=False, showweights=True, 
        poptotal=None, delta=0, iter=20, conv=.0001,checkempty=True,
        yvar=None, xvar=None, paneldownvar=None, panelacrossvar=None, autoheatmap=None, histogram=True):
    """Calculate a weight variable such that for each controlled dimension, the (weighted) count in each category matches a specified total or fraction.
e    variables is a list of the variables for which control totals or proportions are provided.  It can be a sequence or
    a white-space separated string
    marginals is a list of dictionaries where the key is the value of a variable and the value is the target control total or fraction.
    fractional marginals should normally add to 1 and counts should total the same in each dimension, but this is not enforced.
    If there are no cases for a given value, the total or fraction will be less than expected.
    Negative and zero marginals are tolerated but are generally inappropriate.  
    If a control value is not in the appropriate marginals dictionary or is zero, the resulting weight will be SYSMIS.

    finalweight is a string naming the new weight variable.  The variable must not already exist in the active dataset.
    visible indicates whether or not the procedure output and auxiliary dataset are displayed.
    poptotal is a total to which the final weights will be scaled.  By default, they are scaled to sum to the existing weight total, if any, or the number of cases.
    delta, iter, and conv are iteration parameters corresponding to the GENLOG parameters and can be used if there are convergence problems.
    checkempty adjusts for empty cells.  If it is known that there are none, this adjustment, which can use considerable memory, can be bypassed.
    If the active dataset does not have a name, one is assigned automatically."""

    variables = _buildvarlist(variables)
    if len(variables) == 1:   # delta should always be 0 if this is a 1-d problem
        delta = 0.
    wtvar = spss.GetWeightVar()
    if wtvar:  #aggrweight will be the GENLOG CSTRUCTURE variable
        aggrweight = "W_" + rname()
    else:
        aggrweight = "N_" + rname()
    activeds = spssaux.GetActiveDatasetName()
    if not activeds:
        activeds = "D_" + rname()
        spss.Submit("DATASET NAME " + activeds)
    nbreakvars = len(variables)
    if nbreakvars != len(marginals):
        raise ValueError(_("The number of control variables does not match number of sets of control totals"))

    # aggregate the data according to the list of control variables
    aggrdsname = "D_" + rname()
    countname = "N_" + rname()
    spss.Submit("WEIGHT OFF.")
    # aggregate to a new dataset and activate it.  Sum original weight variable if any
    cmd=\
       """DATASET DECLARE  %(aggrdsname)s %(vis)s.
    AGGREGATE 
     /OUTFILE= %(aggrdsname)s
     /BREAK=%(breakvars)s
    %(wtspec)s 
    /%(countname)s=N.
   dataset activate %(aggrdsname)s.""" % \
                                       {'aggrdsname': aggrdsname,
                                        'breakvars' : " ".join(variables),
                                        'wtspec' : ("/" + aggrweight + (wtvar and ("= SUM(" + wtvar + ")") or "= N")),
                                        'countname' : countname,
                                        'vis' : visible and " " or " WINDOW=HIDDEN"
                                        }
    spss.Submit(cmd)
    # if weighting, make a dictionary of the mean weight in each cell for use in final adjustment pass
    if wtvar:
        allaggrdata = spssdata.Spssdata(variables + [aggrweight] + [countname]).fetchall()
        meaninputwts = {}
        for row in allaggrdata:
            try:
                themean = row[nbreakvars] / row[nbreakvars+1]
            except:
                themean = None
            meaninputwts[row[:nbreakvars]] = themean
        

    # get a cursor and add control totals to this dataset
    # The control total is the product of all the variable value control totals or proportions
    # Track the cases to see if there are any empty cells, i.e., some combination of the marginals across
    # all the control variables that does not occur in the aggregated dataset.
    spss.StartProcedure("SPSSINC RAKE - I")
    if checkempty:
        cellset = _setprod(_dictlisttotupledsets(marginals))   # build set of tuples of all sets
        curs = spssdata.Spssdata(accessType='r', indexes=variables, names=False)
        for case in curs:
            cellset.discard(tuple(case[:nbreakvars]))  # remove found cells
        curs.CClose()
        spss.EndProcedure()
        #add cases for any empty cells
        if cellset:
            spss.StartProcedure("SPSSINC RAKE - I")
            curs = spssdata.Spssdata(accessType='a', names=False)
            for s in cellset:
                for cv in range(nbreakvars):
                    curs.appendvalue(cv, s[cv])
                for cv in range(2):  #was nbreakvars
                    curs.appendvalue(nbreakvars+cv, 1e-12)   # 1e-8
                curs.CommitCase()
            curs.CClose()
            spss.EndProcedure()

        # compute expected count from marginals
    try:
        curs = spssdata.Spssdata(accessType='w')
        ctrlwt = "W_"+ rname()  # this will be the SPSS case weight
        curs.append(ctrlwt)
        curs.commitdict()
        novalues = []
        for case in curs:
            w = 1.
            for i, v in enumerate(marginals):
                w = w* v.get(case[i], 0)
                if w == 0:
                    spec = (variables[i], case[i])
                    if not spec in novalues:
                        uspec = spec[0]
                        if not isinstance(uspec, str):
                            uspec = str(uspec, locale.getlocale()[1])
                        info.addrow(_("Variable: %s, value: %s. No control value supplied: weight will be SYSMIS.") % (uspec,  spec[1]))
                    novalues.append(spec)
                    break
            curs.casevalues([w])
    finally:
        curs.CClose()
        spss.EndProcedure()
        info.generate()

    # run GENLOG
    expectedname = 'expected_' + rname()
    newwt = "W_"+ rname()
    #dbg
    ###spss.Submit("""save outfile="c:/temp/genloginput.sav".""")
    if not visible:
        omstag = "O_" + rname()
        spss.Submit("OMS /SELECT ALL EXCEPT =WARNINGS /DESTINATION VIEWER=NO /TAG=" + omstag)
    #debugcmd =  """WEIGHT BY %(ctrlwt)s.
        #GENLOG
        #%(breakvars)s  /CSTRUCTURE = %(aggrweight)s
        #/MODEL = POISSON
        #/PRINT = FREQ ESTIM
        #/plot none
        #/CRITERIA = CIN(95) ITERATE(%(iter)s) CONVERGE(%(conv)s) DELTA(%(delta)s)
       #/save= pred(%(expectedname)s)
        #/DESIGN %(breakvars)s .  """ %\
                                     #{'ctrlwt' : ctrlwt,
                                      #'breakvars': " ".join(variables),
                                      #'aggrweight' : aggrweight ,
                                      #'expectedname' : expectedname,
                                      #'iter' : iter,
                                      #'conv' : conv,
                                      #'delta' : delta
                                      #}

    try:
        spss.Submit("""WEIGHT BY %(ctrlwt)s.
            GENLOG
            %(breakvars)s  /CSTRUCTURE = %(aggrweight)s
            /MODEL = POISSON
            /PRINT = FREQ ESTIM
            /plot none
            /CRITERIA = CIN(95) ITERATE(%(iter)s) CONVERGE(%(conv)s) DELTA(%(delta)s)
           /save= pred(%(expectedname)s)
            /DESIGN %(breakvars)s .  """ %\
                                         {'ctrlwt' : ctrlwt,
                                          'breakvars': " ".join(variables),
                                          'aggrweight' : aggrweight ,
                                          'expectedname' : expectedname,
                                          'iter' : iter,
                                          'conv' : conv,
                                          'delta' : delta
                                          })
    except:
        spss.Submit("DATASET ACTIVATE " + activeds)
        raise ValueError(_("""Failure in GENLOG procedure.  Processing stopped.
        The error could be either a failure to compute the result 
        or not having a license for the Advanced Statistics option"""))
    finally:
        if not visible:
            spss.Submit("OMSEND TAG=" + omstag)
    # get the expected counts, normalized by the cell N in order to distribute
    expkts = {}
    spss.StartProcedure("SPSSINC RAKE - II")

    try:
        curs = spssdata.Spssdata(accessType='r', indexes = variables + [expectedname] + [aggrweight]+ [countname])
        weightsum = 0.
        wsum = 0.
        for case in curs:
            weightsum += case[nbreakvars] or 0  # allow for missing values j.i.c
            if case[-3]:
                wsum += case[-2]
                w = case[-3]/case[-1]
            else:
                w = None
            expkts[tuple(case[:nbreakvars])] = w
    finally:
        curs.CClose()
        spss.EndProcedure()
    # normalize weights to user total or sum of sample weights
    poptotal = poptotal or wsum
    for key in expkts:
        if expkts[key]:
            expkts[key] *= poptotal/weightsum

    # return to the first dataset and apply weights.
    spss.Submit("DATASET ACTIVATE " + activeds)
    if not visible:
        spss.Submit("DATASET CLOSE " + aggrdsname)
    spss.StartProcedure("SPSSINC RAKE - III")
    if wtvar:
        indexes = variables+ [wtvar]
    else:
        indexes = variables
    curs = spssdata.Spssdata(accessType='w', indexes = indexes)
    try:
        failed = False
        curs.append(spssdata.vdef(finalweight, vlabel=_("Raked Weight")))
        curs.commitdict()
        wirisum = 0.
        wirisumNewwt = 0.
        wiri2sum = 0.
        wiri2sumNewwt = 0.
        wisum = 0.
        actuals = {}   # dictionary for weights and counts actually used
        actualsNewwt = {}

        for case in curs:
            index = case[:nbreakvars]
            rwt = expkts.get(index, None)   # raked weight
            if wtvar:
                wt = case[-1]
            else:
                wt = 1.
            # for weighted data, adjust cell weights by input case weight normalized by cell mean weight
            try:
                if wtvar:
                    newwt = rwt * wt / meaninputwts[index]
                else:
                    newwt = rwt
            except:
                newwt = None
            curs.casevalues([newwt])
            #curs.casevalues([rwt])
            try:
                kt = actuals.get(index, (0,0))[1] + wt
                actuals[index] = [rwt, kt]
            except: 
                pass
            if wtvar:
                try:
                    cumwt, cumkt = actualsNewwt.get(index, [0,0])
                    cumwt += newwt
                    cumkt += wt
                    #cumkt += 1
                    actualsNewwt[index] = [cumwt, cumkt]
                except:
                    pass

            if not rwt is None:
                wisum += wt
                wirisum += wt * newwt
                wiri2sum += wt * newwt * newwt

    except:
        curs.CClose()
        curs = None
        spss.EndProcedure()
        raise
    finally:
        if not curs is None:
            curs.CClose()
    denom = wisum * wiri2sum
    if denom != 0:
        sampleeff = 100. * wirisum * wirisum / denom
    else:
        sampleeff = None
    cells = [sampleeff]
    rowlabels = [_("""Sample Balance""")]
    if wtvar:
        #denom = wisum * wiri2sumNewwt
        #if denom != 0:
            #sampleeffNewwt = 100. * wirisumNewwt * wirisumNewwt / denom
        #else:
            #sampleeffNewwt = None
        #cells.append(sampleeffNewwt)
        rowlabels = [_("""Sample Balance Including Final Weight Adjustment""")]
    tbl = spss.BasePivotTable(_("""Sample Balance Based on Variables: %s""") % ", ".join(variables), 
        "RAKEBALANCE")
    tbl.SimplePivotTable(rowlabels=rowlabels, collabels=[_("""Balance""")], cells=cells)
    
    # table of weights
    if showweights:
        collabels = [_("""Category Rake Weight""")]
        if wtvar:
            for k in actuals:
                cumwt, cumkt = actualsNewwt[k]
                act = actuals[k]
                act.append(cumwt / cumkt)
                actuals[k] = act
            collabels.append(_("Case Count Weighted by Input Weight"))
            collabels.append(_("Mean Adjusted Raked Weight"))
        else:
            collabels.append(_("Unweighted Case Count"))
        #items = sorted(expkts.items())
        items = sorted(actuals.items())
        rowlabels = [", ".join([str(v) for v in item[0]]) for item in items]
        cells = [item[-1] for item in items]
    
        tbl2 = spss.BasePivotTable(_("""Raked Weights"""), "RAKEDWEIGHTS")
        tbl2.SimplePivotTable(rowdim=", ".join(variables), rowlabels = rowlabels, 
            collabels=collabels, cells=cells)
    spss.EndProcedure()
    if not failed:
        if histogram:
            dohistogram(finalweight)
        doheatmap(variables, yvar, xvar, paneldownvar, panelacrossvar, finalweight, autoheatmap)
        spss.Submit("WEIGHT BY " + finalweight)
def plots(yvars,
          xvars,
          color=None,
          size=None,
          shape=None,
          label=None,
          linear=False,
          quadratic=False,
          cubic=False,
          loess=False,
          ignore=False,
          title="",
          categorical="bars",
          group=1,
          boxplots=False,
          hexbin=False,
          applyfitto="total",
          indent=15,
          yscale=75,
          pagex=None,
          pagey=None):
    """Create plots per specifcation described in help above"""

    # debugging
    # makes debug apply only to the current thread
    #try:
    #import wingdbstub
    #if wingdbstub.debugger != None:
    #import time
    #wingdbstub.debugger.StopDebug()
    #time.sleep(2)
    #wingdbstub.debugger.StartDebug()
    #import thread
    #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
    ## for V19 use
    ##    ###SpssClient._heartBeat(False)
    #except:
    #pass

    npage = [pagex, pagey].count(None)  # 0 means both specified
    if npage == 1:
        raise ValueError(
            _("Page specification must include both x and y sizes"))
    if group > 1:
        boxplots = False
    spssweight = spss.GetWeightVar()
    if not spssweight:
        spssweight = None

    vardict = spssaux.VariableDict()
    # display pivot table of legend information
    fits = []
    for i, fittype in enumerate([linear, quadratic, cubic, loess]):
        if fittype:
            fits.append(fittypetable[i])

    spss.StartProcedure("STATS REGRESS", _("Relationship Plots"))
    ttitle = _("Chart Legend Information")
    if title:
        ttitle = ttitle + "\n" + title
    tbl = spss.BasePivotTable(
        ttitle,
        "CHARTLEGENDINFO",
        caption=
        _("Legend Settings for the charts that follow.  Some settings do not apply to categorical charts."
          ))
    tbl.SimplePivotTable(_("Settings"),
                         rowlabels=[
                             _("Color by"),
                             _("Size by"),
                             _("Shape by"),
                             _("Label by"),
                             _("Fit Lines")
                         ],
                         collabels=[_("Value")],
                         cells=[
                             labelit(color, vardict) or "---",
                             labelit(size, vardict) or "---",
                             labelit(shape, vardict) or "---",
                             labelit(label, vardict) or "---", "\n".join(fits)
                             or "---"
                         ])
    spss.EndProcedure()

    # group fitlines only available for categorically defined groups
    if not color or (color and vardict[color].VariableLevel == "scale"):
        applyfitto = "total"

    aesthetics = set([
        item for item in [color, size, shape, label, spssweight]
        if not item is None
    ])

    for y in yvars:
        yobj = vardict[y]
        if yobj.VariableLevel != "scale":
            raise ValueError(
                _("Y variables must have a scale measurement level: %s") % y)
        yvarlabel = yobj.VariableLabel or y

        # construct one possibly multi-part chart for each numcharts variables
        for xpart in xgen(xvars, group):
            first = True
            cmd = []
            numcharts = len(xpart)
            mostvariables = " ".join(
                set(xpart +
                    list(aesthetics)))  # eliminate duplicates (except with y)
            if spssweight:
                options = ", weight(%s)" % spssweight
            else:
                options = ""
            cmd.append(ggraphtemplate % {
                "allvars": y + " " + mostvariables,
                "options": options
            })
            indentx = indent
            if npage == 0:  # page specs were given
                if numcharts < group:  # short row
                    shortpagex = pagex * indent / 100. + pagex * (
                        100. - indent) / 100. * (float(numcharts) / group)
                    indentx = indent * (pagex / shortpagex)
                    cmd.append(pagestarttemplate % {
                        "pagex": shortpagex,
                        "pagey": pagey
                    })
                else:
                    cmd.append(pagestarttemplate % {
                        "pagex": pagex,
                        "pagey": pagey
                    })
            cmd.append(datatemplate % {"varname": y, "unitcategory": ""})
            alldatastatements = set([y.lower()])
            if spssweight:
                cmd.append(gendata(spssweight, vardict, alldatastatements))

            # loop over one or more x variables for this chart
            for currentn, x in enumerate(xpart):
                xobj = vardict[x]
                ml = xobj.VariableLevel
                if numcharts > 1:
                    cmd.append(
                        graphstarttemplate % {
                            "originandscale":
                            scaling(numcharts, currentn, indentx, yscale)
                        })
                if boxplots and ml == "scale":
                    cmd.append(
                        graphstarttemplate %
                        {"originandscale": "origin(15%, 10%), scale(75%,75%)"})
                if ml == "scale":  # build scatterplot specs
                    uc = ""
                    options = ""
                    if size:
                        options = options + ", size(%s)" % size
                        cmd.append(gendata(size, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "size"})
                    if color:
                        options = options + ", color.exterior(%s)" % color
                        cmd.append(gendata(color, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "color.exterior"})
                    if shape:
                        if vardict[shape].VariableLevel == "scale":
                            raise ValueError(
                                _("The shape variable must be categorical: %s")
                                % shape)
                        options = options + ", shape(%s)" % shape
                        cmd.append(gendata(shape, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "shape"})
                else:
                    uc = iscat
                    if categorical == "bars":
                        cmd.append(include0)
                if not first:
                    other = ", null()"
                else:
                    other = ""
                if title and numcharts == 1 and not boxplots:
                    cmd.append(titletemplate % {"title": title})
                cmd.append(gendata(x, vardict, alldatastatements))
                if label:
                    cmd.append(gendata(label, vardict, alldatastatements))
                #cmd.append(datatemplate % {"varname": x, "unitcategory": uc})
                cmd.append(guidetemplate % {
                    "dim": 1,
                    "varlabel": xobj.VariableLabel or x,
                    "other": ""
                })
                if first:
                    cmd.append(guidetemplate % {
                        "dim": 2,
                        "varlabel": yvarlabel,
                        "other": other
                    })
                else:
                    cmd.append(noyaxis)
                if ml == "scale":
                    if label:
                        options = options + ", label(%s))" % label
                    if hexbin:
                        cmd.append(hexbinscatterelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                    else:
                        cmd.append(scatterelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                    for i, fittype in enumerate(
                        [linear, quadratic, cubic, loess]):
                        if fittype:
                            if applyfitto == "group":
                                colorspec = ", color(%s)" % color
                            else:
                                colorspec = ""
                            if numcharts > 1:
                                cmd.append(aesth % {"atype": "color"})
                            cmd.append(fitlineelement % \
                                {"fittype": fittypekwd[i], "y": y, "x": x, "lineshape" : lineshapes[i], "color" : colorspec})
                    if boxplots:  # bordered boxplot if single variable chart
                        cmd.append(graphendtemplate)
                        cmd.append(graphstarttemplate % {
                            "originandscale":
                            "origin(15%, 0%), scale(75%,8%)"
                        })
                        cmd.append("""GUIDE: axis(dim(1), ticks(null()))""")
                        cmd.append("""COORD: rect(dim(1))""")
                        cmd.append(oneboxplotelement % {"variable": x})
                        cmd.append(graphendtemplate)
                        cmd.append(graphstarttemplate % {
                            "originandscale":
                            "origin(92%, 10%), scale(8%, 75%)"
                        })
                        cmd.append("COORD: transpose(rect(dim(1)))")
                        cmd.append("""GUIDE: axis(dim(1), ticks(null()))""")
                        cmd.append(oneboxplotelement % {"variable": y})
                        cmd.append(graphendtemplate)

                else:
                    if categorical != "boxplot":
                        cmd.append(categoricalelement % {
                            "etype": elementmap[categorical],
                            "y": y,
                            "x": x
                        })
                    else:
                        if label:
                            options = ", label(%s)" % label
                        else:
                            options = ""
                        cmd.append(boxplotelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                first = False
                if numcharts > 1:
                    cmd.append(graphendtemplate)
            if npage == 0:
                cmd.append(pageendtemplate)
            cmd.append(endgpl)
            spss.Submit(cmd)
def catvalues(items=None, prefix=None, names=None, specialvars=None,
    specialsorder="after", order="d", missing="exclude",
    categorylabels="varlabels", specialvalues=None, other=False,
    variables=None, countvalues=None, macroname=None,
    mincount=0, minpercent=0, maxcount = None, maxpercent=None, separator=" ",
    customattr=False, attrname="ORDER"):
    """Construct macros, custom attributes and MR set definitions"""

    ##debugging
    # makes debug apply only to the current thread
    #try:
        #import wingdbstub
        #if wingdbstub.debugger != None:
            #import time
            #wingdbstub.debugger.StopDebug()
            #time.sleep(1)
            #wingdbstub.debugger.StartDebug()
        #import thread
        #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
        # for V19 use
        ##    ###SpssClient._heartBeat(False)
    #except:
        #pass
    weightvar = spss.GetWeightVar()
    # tODO: allow both names and macroname to be None if customattr
    if variables is None:
        if sum([prefix is None, names is None]) != 1 and not customattr:
            raise ValueError(_("""Either a  prefix or a set of macro or MR set names or a custom attribute must be specified"""))
        if names is not None:
            names = spssaux._buildvarlist(names)
            if len(names) != len(items):
                raise ValueError(_("""The number of macro or MR set names specified is different from the number of items"""))
        if prefix is not None:
            names = [prefix + "_" + name for name in items]   # MR sets will have a $ in the macro name
    
        resolver = Resolver()
        allvars, vartypes = resolver.resolve(items)  # check existence and get all variables and variable types
        alltypes = vartypes.values()
        # variables, direct or in MR sets, must be either all strings or all numeric
        numerics = any([vartypes[item] == 0 for item in allvars])
        strings = any([vartypes[item] != 0 for item in allvars])
        if numerics and strings:
            raise ValueError(_("""Items to process must be either all numeric or all string"""))
        try:
            if numerics and specialvalues is not None:
                specialvalues = [float(item) for item in specialvalues]
        except:
            raise ValueError(_("""A nonnumeric special value was given for numeric variables"""))
        regularvars = [item for item in items if not item.startswith("$")]
        mrsets = [item for item in items if item.startswith("$")]
        resolver.close()
        if regularvars and other:
            valuelabelsdict, missingvaluesdict = getmetadata(regularvars, missing)
        else:
            valuelabelsdict = None
            missingvaluesdict = None
        
        macrosgenerated = []
        if regularvars:
            macrosgenerated, customattrsgenerated = genVarsCategoryList(regularvars, 
                specialvalues=specialvalues, macroname=names, 
                missing=missing, order=order, weightvar=weightvar, specialsorder=specialsorder, 
                valuelabelsdict=valuelabelsdict, missingvaluesdict=missingvaluesdict,
                customattr=customattr, attrname=attrname)
            spss.StartProcedure("STATS CATEGORY ORDER", "STATSCATEGORYMACRO")
            if customattrsgenerated:
                caption = _("Custom attribute name: %s") % attrname
            else:
                caption = ""
            table = spss.BasePivotTable("Generated Macros or Custom Attributes", 
                "STATSCATMACROS", caption=caption)
            if macrosgenerated:
                gen = macrosgenerated
            else:
                gen = customattrsgenerated
            table.SimplePivotTable(rowdim=_("Name"), rowlabels=[names for names, values in gen],
                collabels = [_("Definition")], cells=[values for names, values in gen])
            spss.EndProcedure()  # can't issue MRSETS command in a procedure state
        # specialvars do not have to have been in the original set.
        if mrsets:
            mrsetsgenerated = genSetsCategoryList(mrsets, allvars, vartypes, 
                resolver, specialvalues=specialvars, macroname=names, 
                missing=missing, order=order, weightvar=weightvar,
                categorylabels=categorylabels, specialsorder=specialsorder, other=other)
    
        # mrset generation displayed via MRSET creation command so not repeated here
        for m in macrosgenerated:
            ###spss.SetMacroValue(m[0], m[1])    # This api mishandles Unicode characters so use syntax instead
            spss.Submit("""DEFINE %s() %s""" %(m[0], m[1]))
            
    if variables is not None:
        if items is not None or names is not None:
            raise ValueError(_("""ITEMS and NAMES cannot be used with COUNTORDERING specifications."""))
        if countvalues is None or macroname is None:
            raise ValueError(_("""The values to count and a macroname must be specified when using COUNTORDERING specifications."""))
        genVarMacro(variables, countvalues, order, macroname, mincount,
            minpercent, maxcount, maxpercent, separator, weightvar, missing)