Beispiel #1
0
def mod_database(input_scale, prefix, start, stop):
    vars_in_file = spssaux.VariableDict().Variables
    df_dict = {}
    for i in range(start, stop):
        df = input_scale.reset_index()
        df['time'] = prefix + str(i)
        for col in gb.columns_to_use:
            df[col] = prefix + str(i) + df[col]
        df_dict[i] = df
    db = pd.concat(df_dict, ignore_index=True)
    diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist()
    db2 = db[db['items'].isin((vars_in_file))]
    spss.Submit(recode(db2))
    rename_db = db2.loc[db['rename'].notnull(), ['items', 'rename']]
    for i, row in rename_db.iterrows():
        orgname = row['items']
        new_name = row['rename']
        spss.Submit('RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format(
            orgname=orgname, new_name=new_name))
    vars_in_file = spssaux.VariableDict().Variables
    diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist()
    vars_not_in_file = 'Variables not in file\n' + '\n'.join(diff_vars) + '\n'
    print(
        str(len(diff_vars)) +
        ' variables not in file. Check log.txt for specification\n')
    db = db[db['items'].isin((vars_in_file))]
    return db, vars_not_in_file
Beispiel #2
0
def expandvarnames(varnames):
    """Return varnames with ALL and TO expansion"""

    # varnames allows the construct v1, v2, ... ALL to coerce the order
    # as well as TO and ALL expansion
    vardict = None
    varnamesLower = [item.lower() for item in varnames]
    try:
        # check for and process ALL name
        allLoc = varnamesLower.index('all')
    except ValueError:
        allvars = []
    else:
        vardict = spssaux.VariableDict()
        if allLoc != len(varnames) - 1:
            raise ValueError(
                _("""ALL must be the last item in the variable list"""))
        allvars = vardict.expand("ALL")
        varnames = varnames[:-1]

    # process TO
    if 'to' in varnamesLower:
        if not vardict:
            vardict = spssaux.VariableDict()
        varnames = vardict.expand(varnames)
    # append ALL result if it was specified
    # would use set union but order matters
    for item in allvars:
        if not (item in varnames or item.lower() in varnames):
            varnames.append(item)
    return varnames
Beispiel #3
0
def main():
    gb.to_log += """data in = {data_in}
data out = {data_out}\n\n""".format(data_in=gb.org_data,
                                    data_out=gb.select_rev)
    with open(log, 'w') as out:
        out.write(gb.to_log)
    spssaux.OpenDataFile(gb.org_data)
    spss.SetOutput("off")
    db = pd.DataFrame.from_csv(gb.input_csv)
    db2 = db.loc[db[gb.version].notnull() | db[gb.single_items].notnull()
                 | db['rename'].notnull() | db['recode'].notnull()]
    vars, to_log2 = mod_database(db2, gb.prefix, gb.start, gb.stop)
    cmd = prepare_data(vars)
    with open(syntax_save, 'w') as out:
        out.write(cmd)
    list_of_all_vars = vars['items'].values.tolist()
    vars_in_file = spssaux.VariableDict().Variables
    to_log1 = str(len(list_of_all_vars)) + ' variables extracted.\n'
    print(to_log1)
    with open(log, 'a') as out:
        out.write(to_log1)
    how_often(vars, vars_in_file, gb.prefix, gb.start, gb.stop)
    reverse_items.main(vars, log, syntax_save)
    cmd = extract_vars(list_of_all_vars, gb.urvalsinfo, gb.select_rev,
                       gb.keep_vars)
    with open(syntax_save, 'a') as out:
        out.write(cmd + '\n')
    #spss.Submit(cmd)
    with open(log, 'a') as out:
        out.write(to_log2)
    print("--- %s seconds ---" % (time.time() - start_time))
Beispiel #4
0
def dicotomize(db):
    sdict = spssaux.VariableDict()
    cmd = ''
    for typ in columns_to_use:
        if typ == version:
            mod_cut = cut_mod
            suffix = '_middle'
        if typ == single_items:
            mod_cut = single_items_cut_mod
            suffix = '_single'
        for var in db['items']:
            mod_val = db.loc[db['items'] == var, mod_cut ]
            mod = 0
            if pd.notnull(mod_val.iloc[0]):
                mod = int(mod_val.iloc[0])
            key_list = []
            labels = sdict[var].ValueLabels
            for key in labels:
                if 0 < int(key) < 77:
                    key_list.append(int(key))
            min_val,max_val = min(key_list),max(key_list)
            if max_val - min_val % 2 == 1:  # if even
                limit = max_val / 2
            else:                           # if odd
                limit = (max_val + 1 )/2
            if limit:
                limit = limit + mod
                cmd += 'RECODE {old_var} (1 thru {low_lim}=0) ({high_lim} thru 76=1) INTO {new_var}.\n'.format(low_lim=limit,high_lim=limit+1,old_var=var,new_var=var+suffix)
                if 'x6_2occdev1' == var:
                    print(var)
    cmd += 'EXECUTE.\n'
    print(cmd)
    return cmd
Beispiel #5
0
def reverse_items(db, log, syntax_save):
    cmd = ''
    reverse_list = db.loc[db['reverse'] == 1, ['items']].values.tolist()
    n = 0
    sdict = spssaux.VariableDict()
    for item in reverse_list:
        n += 1
        cmd += get_reverse(item[0], sdict, log)
    return cmd, n
Beispiel #6
0
def display(variables, withvars, stats, matnames, clevel, missing, inclusion):
    """Display pivot table output for regular or split files
    variables is the main variable list
    withvars is None or a list of variables to correlate with
    stats is the result structure
    clevel is the confidence level
    missing is listwise or pairwise
    include is include or exclude for user missing values"""

    spss.StartProcedure(_("Correlations"), "CICORRELATIONS")
    tbl = spss.BasePivotTable(_("Correlations"), "CICORRELATIONS")
    tbl.Caption(
        _("""Missing value handling: %s, %s.  C.I. Level: %s""") %
        (missing, inclusion, clevel))
    rowsplits = []
    for v in spss.GetSplitVariableNames():
        rowsplits.append(tbl.Append(spss.Dimension.Place.row, v))
    nsplitvars = len(rowsplits)
    var1 = tbl.Append(spss.Dimension.Place.row, _("Variable"))
    var2 = tbl.Append(spss.Dimension.Place.row, _("Variable2"))
    vlist = withvars and withvars or variables
    col1 = tbl.Append(spss.Dimension.Place.column, _("Statistic"))
    tbl.SetCategories(col1, [
        CellText.String(_("Correlation")),
        CellText.String(_("Count")),
        CellText.String(_("Lower C.I.")),
        CellText.String(_("Upper C.I.")),
        CellText.String(_("Notes"))
    ])

    vardict = spssaux.VariableDict()
    for vcount, s in enumerate(stats):
        for i, vv in enumerate(vlist):
            j = i + (withvars is not None and len(variables))
            if nsplitvars > 0:
                rows = copy.copy(s.splitvars)
            else:
                rows = []
            ###rows.append(s.variable)
            rows.append(
                CellText.VarName(
                    vardict[s.variable.toString().rstrip()].VariableIndex))
            vvitem = vardict[vv].VariableIndex
            rows.append(CellText.VarName(vvitem))  # 2/9/2022
            if s.ns[j].toNumber() > 10:
                note = ""
            elif s.ns[j].toNumber() <= 3:
                note = _("Some items not computed")
            else:
                note = _("Normality assumption is not accurate")
            statsi = [
                s.corrs[j], s.ns[j], s.cis[j][0], s.cis[j][1],
                CellText.String(note)
            ]
            tbl.SetCellsByRow(rows, statsi)
            ###tbl.SetCellsByRow(rows, [Ctn(item) for item in statsi])
    spss.EndProcedure()
Beispiel #7
0
def impute_item_for_item():
    cmd = ''
    sdict = spssaux.VariableDict()
    for value, gender in sdict[gb.gender].ValueLabels.items():
        cmd += ("""DATASET ACTIVATE orginal.
                    DATASET COPY  {gender}.
                    DATASET ACTIVATE  {gender}.
                    FILTER OFF.
                    USE ALL.
                    SELECT IF ({gender_var} = {val}).
                    EXECUTE.\n""".format(val=value,
                                         gender=gender,
                                         gender_var=gb.gender))
        for var in db_glob['items'].loc[db_glob['items'].notnull()]:
            try:
                var_list = [
                    gb.prefix + str(i) + var for i in range(gb.start, gb.stop)
                ]
                var_list = [var for var in var_list if var in sdict]
            except:
                print(var)
                print(type(var))
                print(var)
            if len(var_list) > 0:
                cmd += 'DATASET DECLARE {var}.\n'.format(var=var + '_' +
                                                         gender)
                cmd += 'MULTIPLE IMPUTATION {all_vars} \n'.format(
                    all_vars=' '.join(var_list))
                cmd += '  /IMPUTE METHOD=FCS MAXITER= 10 NIMPUTATIONS=25 SCALEMODEL=LINEAR INTERACTIONS=NONE \n  SINGULAR=1E-012 MAXPCTMISSING=NONE \n'
                for prefixed_var in var_list:
                    vals = [
                        val for val in sdict[prefixed_var].ValueLabels
                        if val not in gb.missing
                    ]
                    if min(vals) < 1 or max(vals) > 10:
                        print('warning: \nmin = {min} \nmax = {max}'.format(
                            min=min(vals), max=max(vals)))
                    cmd += ' /CONSTRAINTS {prefixed_var}( MIN={min} MAX={max} RND=1)\n'.format(
                        prefixed_var=prefixed_var,
                        min=min(vals),
                        max=max(vals))
                cmd += '  /MISSINGSUMMARIES NONE\n'
                cmd += '  /IMPUTATIONSUMMARIES MODELS\n'
                cmd += '  /OUTFILE IMPUTATIONS={var} .\n'.format(var=var +
                                                                 '_' + gender)
                cmd += 'DATASET ACTIVATE {var}.\n'.format(var=var + '_' +
                                                          gender)
                cmd += 'SAVE OUTFILE = "{var}.sav"\n'.format(var=var + '_' +
                                                             gender)
                cmd += '  /KEEP Imputation_ {ID} {gender_var} {vars}\n'.format(
                    vars=' '.join(var_list), gender_var=gb.gender, ID=gb.id)
                cmd += '  /COMPRESSED.\n'
                cmd += 'DATASET ACTIVATE {gender}.\n'.format(gender=gender)
    return cmd
Beispiel #8
0
def dolabels(variables=None, varpattern=None,
    lblvars=None, lblpattern=None, execute=True,
    varsperpass=20, syntax=None):
    """Execute STATS VALLBLS FROMDATA"""
    
# debugging
    # makes debug apply only to the current thread
    #try:
        #import wingdbstub
        #if wingdbstub.debugger != None:
            #import time
            #wingdbstub.debugger.StopDebug()
            #time.sleep(1)
            #wingdbstub.debugger.StartDebug()
        #import thread
        #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
        ## for V19 use
        ###    ###SpssClient._heartBeat(False)
    #except:
        #pass
    try:
        vardict = spssaux.VariableDict(caseless=True)
    except:
        raise ValueError(_("""This command requires a  newer version the spssaux module.  \n
It can be obtained from the SPSS Community website (www.ibm.com/developerworks/spssdevcentral)"""))
    
    varstolabel = resolve(vardict, _("variables to label"), variables, varpattern, stringonly=False)
    labelvars = resolve(vardict, _("label variables"), lblvars, lblpattern, stringonly=True)
    if len(varstolabel) == 0 or len(labelvars) == 0:
        raise ValueError(_("""No variables to label or no labelling variables were specified.
If a pattern was used, it may not have matched any variables."""))
    if len(labelvars) > 1 and len(labelvars) != len(varstolabel):
        raise ValueError(_("The number of label variables is different from the number of variables to label"))
    if min([vardict[item].VariableType for item in labelvars]) == 0:
        raise ValueError(_("""The label variables must all have type string"""))
    dsname = spss.ActiveDataset()
    if dsname == "*":
        raise ValueError(_("""The active dataset must have a dataset name in order to use this procedure"""))
    if syntax:
        syntax = syntax.replace("\\", "/")
        syntax = FileHandles().resolve(syntax)
        
    mkvl = Mkvls(varstolabel, labelvars, varsperpass, execute, syntax, vardict)
    
    for i in range(0, len(varstolabel), varsperpass):
        spss.Submit("""DATASET ACTIVATE %s""" % dsname)
        mkvl.doaggr(i)
    spss.Submit("""DATASET ACTIVATE %s""" % dsname)    
    labelsyntax = mkvl.dolabels()
    if labelsyntax and execute:
        spss.Submit(labelsyntax)
    mkvl.report(labelsyntax)
    if labelsyntax and syntax:
        writesyntax(labelsyntax, syntax, mkvl)
Beispiel #9
0
def reverse_items():
    cmd = ''
    sdict = spssaux.VariableDict()
    for row in df.values.tolist():
        if pd.notnull(row[2]):
            rev = [int(x) for x in row[2].split(",")]
            rev_items = [row[5:][x - 1] for x in rev]
            for item in rev_items:

                cmd += get_reverse(item, sdict)
    return cmd
Beispiel #10
0
def mod_database(input_scale, prefix, start, stop):
    vars_in_file = spssaux.VariableDict().Variables
    df_dict = {}
    for i in range(start, stop):
        df = input_scale.reset_index()
        df['time'] = prefix + str(i)
        df["indep"] = prefix + str(i) + df["indep"]
        df_dict[i] = df
    db = pd.concat(df_dict, ignore_index=True)
    db = db[db['indep'].isin((vars_in_file))]
    return db
def Run(args):
    """Execute the SPSSINC ANON extension command"""

    args = args[list(args.keys())[0]]

    oobj = Syntax([
        Template("VARIABLES",
                 subc="",
                 ktype="existingvarlist",
                 var="varnames",
                 islist=True),
        Template("SVALUEROOT",
                 subc="OPTIONS",
                 ktype="literal",
                 var="svalueroot"),
        Template("NAMEROOT", subc="OPTIONS", ktype="varname", var="nameroot"),
        Template("METHOD",
                 subc="OPTIONS",
                 ktype="str",
                 var="method",
                 vallist=['random', 'sequential', 'transform']),
        Template("SEED", subc="OPTIONS", ktype="float", var="seed"),
        Template("OFFSET", subc="OPTIONS", ktype="float", var="offset"),
        Template("SCALE", subc="OPTIONS", ktype="float", var="scale"),
        Template("MAXRVALUE",
                 subc="OPTIONS",
                 ktype="int",
                 var="maxrvalue",
                 islist=True),
        Template("ONETOONE",
                 subc="OPTIONS",
                 ktype="existingvarlist",
                 var="onetoone",
                 islist=True),
        Template("MAPPING", subc="OPTIONS", ktype="literal", var="mapping"),
        Template("NAMEMAPPING",
                 subc="SAVE",
                 ktype="literal",
                 var="namemapping"),
        Template("VALUEMAPPING",
                 subc="SAVE",
                 ktype="literal",
                 var="valuemapping"),
        Template("IGNORETHIS", subc="SAVE", ktype="bool", var="ignorethis"),
        Template("HELP", subc="", ktype="bool")
    ])

    # A HELP subcommand overrides all else
    if "HELP" in args:
        #print helptext
        helper()
    else:
        processcmd(oobj, args, anon, vardict=spssaux.VariableDict())
Beispiel #12
0
def add_suffix(folder):
    for item in ["/no_strings", "/strings"]:
        datafiles = get_filelist(folder + item, 'sav')
        for file in datafiles:
            print(file)
            exclude = ['kod_id']  #Ange namnet på id_variablen
            spssaux.OpenDataFile(file)
            basename = os.path.basename(file).strip('.sav')
            suffix = basename  #önskat suffix
            print(basename)
            vars = spssaux.VariableDict().variables
            for i in exclude:
                if i in vars:
                    vars.remove(i)
            oldnames = spssaux.VariableDict().expand(vars)
            newnames = [varnam + "_" + suffix for varnam in oldnames]
            spss.Submit('rename variables (%s=%s).' %
                        ('\n'.join(oldnames), '\n'.join(newnames)))
            spss.Submit("""
            SAVE OUTFILE = "%s%s".
            DATASET CLOSE ALL.
            NEW FILE.
            """ % (folder + item + '/suffix/', basename + '.sav'))
Beispiel #13
0
def fix_files(folder, datevars, id_variable, date):
    datafiles = get_filelist(folder, "sav")
    for fil in datafiles:
        exclude = []
        spssaux.OpenDataFile(fil)
        vars_in_file = spssaux.VariableDict().variables
        rename_vars(vars_in_file)
        spss.Submit("ALTER TYPE %s (f8)." % id_variable)
        stringvars = get_stringvars()
        print(stringvars)
        vars_in_file = spssaux.VariableDict().variables
        base, ext = os.path.basename(fil).split('.')
        for var in stringvars:
            if var in vars_in_file:
                exclude.append(var)
        if "AnswerDate" in vars_in_file:
            spss.Submit("""
            COMPUTE time_days = DATEDIFF(AnswerDate, DATE.MDY(%s), "day").
            EXECUTE.
            """ % date)
        for var in vars_in_file:
            if 'mean' not in var and var not in exclude + datevars:
                spss.Submit("ALTER TYPE %s (f8)." % var)
                spss.Submit("VARIABLE ALIGNMENT %s(right)." % var)
                spss.Submit("VARIABLE LEVEL %s(scale)." % var)
                if var != id_variable:
                    spss.Submit("RECODE %s (SYSMIS=999)." % var)
                    spss.Submit("MISSING VALUES %s (999)." % var)
        spss.Submit("SORT CASES BY %s (A)." % id_variable)
        spss.Submit("""COMPUTE data_from_week=%s.
        EXECUTE.
        ALTER TYPE data_from_week (f8).
        ALTER TYPE time_days (f8).
        """ % base)
        spss.Submit(save_and_close(base, exclude, folder))
    spss.Submit(("NEW FILE."))
Beispiel #14
0
def compute_means():
    cmd = ''
    rev = list()
    sdict = spssaux.VariableDict()

    for row in df.values.tolist():
        rev = list()
        #items = [x for x in row[5:] if pd.notnull(x)]
        items = [x for x in row[5:] if pd.notnull(x) and x in sdict]
        print(items)
        if len(items) > 1:
            if pd.notnull(row[2]):
                rev = [int(x) for x in row[2].split(",")]
            for n in rev:
                items[n - 1] = items[n - 1] + '_rev'
            cmd += 'COMPUTE {var}_mean=mean.{min_mean}({items}).\n'.format(
                var=row[0], min_mean=row[1], items=','.join(items))
            cmd += 'Execute.\n'
            cmd += "VARIABLE LABELS {var}_mean '{lable}'.\n".format(
                var=row[0], lable=row[4])
    return cmd
Beispiel #15
0
def Run(args):
    """Execute the PROPOR command"""

    debug = False
    if debug:
        print(args)  #debug
    args = args[list(args.keys())[0]]
    # Note that the keys of args are the names of the subcommands that were given.
    if debug:
        print(args)

    # define the syntax
    oobj = Syntax([
        Template("NUM", subc="", ktype="str", islist=True),
        Template("DENOM", subc="", ktype="str", islist=True),
        Template("ID", subc="", ktype="existingvarlist", islist=False),
        Template("HELP", subc="", ktype="bool"),
        Template("NAME", subc="DATASET", var="dsname", ktype="varname"),
        Template("ALPHA",
                 subc="LEVEL",
                 ktype="float",
                 vallist=(.0000000001, .99999999999)),
    ])

    # A HELP subcommand overrides all else
    if "HELP" in args:
        print(helptext)
    else:
        try:
            # parse and execute the command
            oobj.parsecmd(args, vardict=spssaux.VariableDict())
            ###print oobj.parsedparams
            dopropor(**oobj.parsedparams)
        except:
            # Exception messages are printed here, but the exception is not propagated, and tracebacks are suppressed,
            # because as an Extension command, the Python handling should be suppressed (unless debug mode)
            if debug:
                raise
            else:
                print(sys.exc_info()[1])
Beispiel #16
0
def how_often(var_list, vars_in_file, prefix, start, stop):
    sdict = spssaux.VariableDict()
    nn = 0
    for var in var_list:
        n = 0
        list_of_dict = []
        for x in range(start, stop):
            variable = prefix + str(x) + var
            if variable in vars_in_file:
                var_label = sdict[variable].ValueLabels
                for key in ['77', '88', '99']:
                    if key in var_label:
                        del var_label[key]
                list_of_dict.append(var_label)
                n += 1

        if not checkEqual1(list_of_dict):
            nn += 1
            print(var)
    to_log = str(
        nn) + ' variables have different number of response categories.\n'
    print(to_log)
    with open(log, 'a') as out:
        out.write(to_log)
def plots(yvars,
          xvars,
          color=None,
          size=None,
          shape=None,
          label=None,
          linear=False,
          quadratic=False,
          cubic=False,
          loess=False,
          ignore=False,
          title="",
          categorical="bars",
          group=1,
          boxplots=False,
          hexbin=False,
          applyfitto="total",
          indent=15,
          yscale=75,
          pagex=None,
          pagey=None):
    """Create plots per specifcation described in help above"""

    # debugging
    # makes debug apply only to the current thread
    #try:
    #import wingdbstub
    #if wingdbstub.debugger != None:
    #import time
    #wingdbstub.debugger.StopDebug()
    #time.sleep(2)
    #wingdbstub.debugger.StartDebug()
    #import thread
    #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
    ## for V19 use
    ##    ###SpssClient._heartBeat(False)
    #except:
    #pass

    npage = [pagex, pagey].count(None)  # 0 means both specified
    if npage == 1:
        raise ValueError(
            _("Page specification must include both x and y sizes"))
    if group > 1:
        boxplots = False
    spssweight = spss.GetWeightVar()
    if not spssweight:
        spssweight = None

    vardict = spssaux.VariableDict()
    # display pivot table of legend information
    fits = []
    for i, fittype in enumerate([linear, quadratic, cubic, loess]):
        if fittype:
            fits.append(fittypetable[i])

    spss.StartProcedure("STATS REGRESS", _("Relationship Plots"))
    ttitle = _("Chart Legend Information")
    if title:
        ttitle = ttitle + "\n" + title
    tbl = spss.BasePivotTable(
        ttitle,
        "CHARTLEGENDINFO",
        caption=
        _("Legend Settings for the charts that follow.  Some settings do not apply to categorical charts."
          ))
    tbl.SimplePivotTable(_("Settings"),
                         rowlabels=[
                             _("Color by"),
                             _("Size by"),
                             _("Shape by"),
                             _("Label by"),
                             _("Fit Lines")
                         ],
                         collabels=[_("Value")],
                         cells=[
                             labelit(color, vardict) or "---",
                             labelit(size, vardict) or "---",
                             labelit(shape, vardict) or "---",
                             labelit(label, vardict) or "---", "\n".join(fits)
                             or "---"
                         ])
    spss.EndProcedure()

    # group fitlines only available for categorically defined groups
    if not color or (color and vardict[color].VariableLevel == "scale"):
        applyfitto = "total"

    aesthetics = set([
        item for item in [color, size, shape, label, spssweight]
        if not item is None
    ])

    for y in yvars:
        yobj = vardict[y]
        if yobj.VariableLevel != "scale":
            raise ValueError(
                _("Y variables must have a scale measurement level: %s") % y)
        yvarlabel = yobj.VariableLabel or y

        # construct one possibly multi-part chart for each numcharts variables
        for xpart in xgen(xvars, group):
            first = True
            cmd = []
            numcharts = len(xpart)
            mostvariables = " ".join(
                set(xpart +
                    list(aesthetics)))  # eliminate duplicates (except with y)
            if spssweight:
                options = ", weight(%s)" % spssweight
            else:
                options = ""
            cmd.append(ggraphtemplate % {
                "allvars": y + " " + mostvariables,
                "options": options
            })
            indentx = indent
            if npage == 0:  # page specs were given
                if numcharts < group:  # short row
                    shortpagex = pagex * indent / 100. + pagex * (
                        100. - indent) / 100. * (float(numcharts) / group)
                    indentx = indent * (pagex / shortpagex)
                    cmd.append(pagestarttemplate % {
                        "pagex": shortpagex,
                        "pagey": pagey
                    })
                else:
                    cmd.append(pagestarttemplate % {
                        "pagex": pagex,
                        "pagey": pagey
                    })
            cmd.append(datatemplate % {"varname": y, "unitcategory": ""})
            alldatastatements = set([y.lower()])
            if spssweight:
                cmd.append(gendata(spssweight, vardict, alldatastatements))

            # loop over one or more x variables for this chart
            for currentn, x in enumerate(xpart):
                xobj = vardict[x]
                ml = xobj.VariableLevel
                if numcharts > 1:
                    cmd.append(
                        graphstarttemplate % {
                            "originandscale":
                            scaling(numcharts, currentn, indentx, yscale)
                        })
                if boxplots and ml == "scale":
                    cmd.append(
                        graphstarttemplate %
                        {"originandscale": "origin(15%, 10%), scale(75%,75%)"})
                if ml == "scale":  # build scatterplot specs
                    uc = ""
                    options = ""
                    if size:
                        options = options + ", size(%s)" % size
                        cmd.append(gendata(size, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "size"})
                    if color:
                        options = options + ", color.exterior(%s)" % color
                        cmd.append(gendata(color, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "color.exterior"})
                    if shape:
                        if vardict[shape].VariableLevel == "scale":
                            raise ValueError(
                                _("The shape variable must be categorical: %s")
                                % shape)
                        options = options + ", shape(%s)" % shape
                        cmd.append(gendata(shape, vardict, alldatastatements))
                        if numcharts > 1:
                            cmd.append(aesth % {"atype": "shape"})
                else:
                    uc = iscat
                    if categorical == "bars":
                        cmd.append(include0)
                if not first:
                    other = ", null()"
                else:
                    other = ""
                if title and numcharts == 1 and not boxplots:
                    cmd.append(titletemplate % {"title": title})
                cmd.append(gendata(x, vardict, alldatastatements))
                if label:
                    cmd.append(gendata(label, vardict, alldatastatements))
                #cmd.append(datatemplate % {"varname": x, "unitcategory": uc})
                cmd.append(guidetemplate % {
                    "dim": 1,
                    "varlabel": xobj.VariableLabel or x,
                    "other": ""
                })
                if first:
                    cmd.append(guidetemplate % {
                        "dim": 2,
                        "varlabel": yvarlabel,
                        "other": other
                    })
                else:
                    cmd.append(noyaxis)
                if ml == "scale":
                    if label:
                        options = options + ", label(%s))" % label
                    if hexbin:
                        cmd.append(hexbinscatterelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                    else:
                        cmd.append(scatterelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                    for i, fittype in enumerate(
                        [linear, quadratic, cubic, loess]):
                        if fittype:
                            if applyfitto == "group":
                                colorspec = ", color(%s)" % color
                            else:
                                colorspec = ""
                            if numcharts > 1:
                                cmd.append(aesth % {"atype": "color"})
                            cmd.append(fitlineelement % \
                                {"fittype": fittypekwd[i], "y": y, "x": x, "lineshape" : lineshapes[i], "color" : colorspec})
                    if boxplots:  # bordered boxplot if single variable chart
                        cmd.append(graphendtemplate)
                        cmd.append(graphstarttemplate % {
                            "originandscale":
                            "origin(15%, 0%), scale(75%,8%)"
                        })
                        cmd.append("""GUIDE: axis(dim(1), ticks(null()))""")
                        cmd.append("""COORD: rect(dim(1))""")
                        cmd.append(oneboxplotelement % {"variable": x})
                        cmd.append(graphendtemplate)
                        cmd.append(graphstarttemplate % {
                            "originandscale":
                            "origin(92%, 10%), scale(8%, 75%)"
                        })
                        cmd.append("COORD: transpose(rect(dim(1)))")
                        cmd.append("""GUIDE: axis(dim(1), ticks(null()))""")
                        cmd.append(oneboxplotelement % {"variable": y})
                        cmd.append(graphendtemplate)

                else:
                    if categorical != "boxplot":
                        cmd.append(categoricalelement % {
                            "etype": elementmap[categorical],
                            "y": y,
                            "x": x
                        })
                    else:
                        if label:
                            options = ", label(%s)" % label
                        else:
                            options = ""
                        cmd.append(boxplotelement % {
                            "y": y,
                            "x": x,
                            "options": options
                        })
                first = False
                if numcharts > 1:
                    cmd.append(graphendtemplate)
            if npage == 0:
                cmd.append(pageendtemplate)
            cmd.append(endgpl)
            spss.Submit(cmd)
Beispiel #18
0
def weightedkappaextension(variables, wttype=1, cilevel=95):

    varnames = expandvarnames(variables)
    caption = varnames[0] + _(" vs. ") + varnames[1]
    vardict = spssaux.VariableDict(varnames)
    if len(vardict) != len(varnames):
        spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""An invalid variable has been specified. This command is not executed."""
              ))
        spss.EndProcedure()
    elif len(varnames) != 2:
        spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""Exactly two variables must be specified. This command is not executed."""
              ))
        spss.EndProcedure()
    else:
        try:
            warntext = []
            if cilevel < 50:
                warntext.append(
                    _("CILEVEL cannot be less than 50%. It has been set to 50%."
                      ))
                cilevel = 50
            if cilevel > 99.999:
                warntext.append(
                    _("CILEVEL cannot be greater than 99.999%. It has been set to 99.999%."
                      ))
                cilevel = 99.999
            if cilevel == int(cilevel):
                cilevel = int(cilevel)
            if wttype != 1:
                if wttype != 2:
                    warntext.append(
                        _("WTTYPE must be 1 or 2. It has been set to 1."))
                    wttype = 1
            varlist = varnames[0] + ' ' + varnames[1]
            spss.Submit("PRESERVE.")
            tempdir = tempfile.gettempdir()
            spss.Submit("""CD "%s".""" % tempdir)
            wtvar = spss.GetWeightVar()
            if wtvar != None:
                spss.Submit(r"""
COMPUTE %s=RND(%s).""" % (wtvar, wtvar))
                spss.Submit(r"""
EXECUTE.""")
            maxloops = 2 * spss.GetCaseCount()
            spss.Submit("""SET PRINTBACK=OFF MPRINT=OFF MXLOOPS=%s.""" %
                        maxloops)
            activeds = spss.ActiveDataset()
            if activeds == "*":
                activeds = "D" + str(random.uniform(.1, 1))
                spss.Submit("DATASET NAME %s" % activeds)
            tmpvar1 = "V" + str(random.uniform(.1, 1))
            tmpvar2 = "V" + str(random.uniform(.1, 1))
            tmpvar3 = "V" + str(random.uniform(.1, 1))
            tmpvar4 = "V" + str(random.uniform(.1, 1))
            tmpvar5 = "V" + str(random.uniform(.1, 1))
            tmpvar6 = "V" + str(random.uniform(.1, 1))
            tmpdata1 = "D" + str(random.uniform(.1, 1))
            tmpdata2 = "D" + str(random.uniform(.1, 1))
            omstag1 = "T" + str(random.uniform(.1, 1))
            omstag2 = "T" + str(random.uniform(.1, 1))
            omstag3 = "T" + str(random.uniform(.1, 1))
            omstag4 = "T" + str(random.uniform(.1, 1))
            omstag5 = "T" + str(random.uniform(.1, 1))
            omstag6 = "T" + str(random.uniform(.1, 1))
            tmpfile1 = "F" + str(random.uniform(.1, 1))
            tmpfile2 = "F" + str(random.uniform(.1, 1))
            lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel
            upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel
            spss.Submit(r"""
DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1)
            spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1)
            filt = spssaux.GetSHOW("FILTER", olang="english")
            if filt != "No case filter is in effect":
                filtcond = filt.strip("(FILTER)")
                select = "SELECT IF " + str(filtcond) + "."
                spss.Submit("""%s""" % select)
                spss.Submit("""EXECUTE.""")
                spss.Submit("""USE ALL.""")
            banana = spssaux.getDatasetInfo(Info="SplitFile")
            if banana != "":
                warntext.append(_("This procedure ignores split file status."))
                spss.Submit(r"""SPLIT FILE OFF.""")
            spss.Submit(r"""
COUNT %s=%s (MISSING).""" % (tmpvar1, varlist))
            spss.Submit(r"""
SELECT IF %s=0.""" % tmpvar1)
            spss.Submit(r"""
EXECUTE.""")
            validn = spss.GetCaseCount()
            if validn < 2:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""There are too few complete cases. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
            else:
                spss.Submit(r"""
AGGREGATE
   /OUTFILE=* MODE=ADDVARIABLES
   /%s=SD(%s)
   /%s=SD(%s).""" % (tmpvar2, varnames[0], tmpvar3, varnames[1]))
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                datarow = cur.fetchone()
                cur.close()
                sd1 = datarow[-2]
                sd2 = datarow[-1]
                if min(sd1, sd2) == 0:
                    spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                    spss.StartProcedure(_("Weighted Kappa"), "Weighted Kappa")
                    table = spss.BasePivotTable("Warnings ", "Warnings")
                    table.Append(spss.Dimension.Place.row,
                                 "rowdim",
                                 hideLabels=True)
                    rowLabel = CellText.String("1")
                    table[(rowLabel, )] = CellText.String(
                        _("""All ratings are the same for at least one rater. This command is not executed."""
                          ))
                    spss.EndProcedure()
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                else:
                    if len(warntext) > 0:
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        if len(warntext) == 1:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String("%s" %
                                                                  warntext[0])
                            spss.EndProcedure()
                        if len(warntext) == 2:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String(
                                "%s \n"
                                "%s" % (warntext[0], warntext[1]))
                            spss.EndProcedure()
                        if len(warntext) == 3:
                            spss.StartProcedure(_("Weighted Kappa"),
                                                "Weighted Kappa")
                            table = spss.BasePivotTable(
                                "Warnings ", "Warnings")
                            table.Append(spss.Dimension.Place.row,
                                         "rowdim",
                                         hideLabels=True)
                            rowLabel = CellText.String("1")
                            table[(rowLabel, )] = CellText.String(
                                "%s \n"
                                "%s \n"
                                "%s" % (warntext[0], warntext[1], warntext[2]))
                            spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    spss.Submit(r"""
DELETE VARIABLES %s %s.""" % (tmpvar2, tmpvar3))
                    spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile1, varlist, tmpvar4))
                    spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Variables to Cases'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                    spss.Submit(r"""
VARSTOCASES
  /MAKE %s FROM %s.""" % (tmpvar5, varlist))
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
                    catdata = []
                    try:
                        cur = spss.Cursor(isBinary=False)
                    except:
                        cur = spss.Cursor()
                    while True:
                        datarow = cur.fetchone()
                        if datarow is None:
                            break
                        catdata.append(datarow[-1])
                    cur.close()
                    cats = list(set(catdata))
                    cattest = 0
                    if any(item != round(item) for item in cats):
                        cattest = 1
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            _("""Some ratings are not integers. This command is not executed."""
                              ))
                        spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    elif min(cats) < 1.0:
                        spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            _("""Some ratings are less than 1. This command is not executed."""
                              ))
                        spss.EndProcedure()
                        spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
                    else:
                        spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile2, tmpvar5, tmpvar6))
                        spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN""" % tmpdata2)
                        spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Matrix'] 
 /DESTINATION VIEWER=NO
 /TAG='"%s"'.""" % omstag3)
                        spss.Submit(r"""
MATRIX.
GET x 
  /FILE=%s
  /VARIABLES=%s %s.
GET ratecats
  /FILE=%s
  /VARIABLES=%s.
COMPUTE size=MMAX(ratecats).
COMPUTE y=MAKE(size,size,0).
LOOP i=1 to NROW(y).
+ LOOP j=1 to NCOL(y).
+   LOOP k=1 to NROW(x).
+     DO IF (x(k,1)=i and x(k,2)=j).
+       COMPUTE y(i,j)=x(k,3).
+     END IF.
+   END LOOP.
+ END LOOP.
END LOOP.
COMPUTE wttype=%s.
COMPUTE wt=MAKE(NROW(y),NCOL(y),0).
LOOP i=1 to NROW(y).
+ LOOP j=1 to NCOL(y).
+   DO IF wttype=1.
+     COMPUTE wt(i,j)=1-(ABS(i-j)/(size-1)).
+   ELSE IF wttype=2.
+     COMPUTE wt(i,j)=1-((i-j)/(NROW(y)-1))**2.
+   END IF.
+ END LOOP.
END LOOP.
COMPUTE n=MSUM(y).
COMPUTE prop=y/n.
COMPUTE p_i=RSUM(prop).
COMPUTE p_j=CSUM(prop).
COMPUTE w_i=(wt*T(p_j))*MAKE(1,size,1).
COMPUTE w_j=MAKE(size,1,1)*(T(p_i)*wt).
COMPUTE po=MSUM(wt&*prop).
COMPUTE pe=MSUM(MDIAG(p_i)*wt*MDIAG(p_j)).
COMPUTE kstat=(po-pe)/(1-pe).
COMPUTE var0=(T(p_i)*((wt-(w_i+w_j))&**2)*T(p_j)-pe**2)/(n*(1-pe)**2).
DO IF var0>=0.
+ COMPUTE ase0=SQRT(var0).
ELSE.
+ COMPUTE ase0=-1.
END IF.
DO IF ase0>0.
+ COMPUTE z=kstat/ase0.
+ COMPUTE sig=1-CHICDF(z**2,1).
ELSE.
+ COMPUTE z=-1.
+ COMPUTE sig=-1.
END IF.
COMPUTE var1=(MSUM((prop&*((wt-(w_i+w_j)&*(1-kstat))&**2)))-(kstat-pe*(1-kstat))**2)/(n*(1-pe)**2).
DO IF var1>=0.
+ COMPUTE ase1=SQRT(var1).
ELSE.
+ COMPUTE ase1=-1.
END IF.
SAVE {wttype,kstat,ase1,z,sig,ase0}
   /OUTFILE=%s
   /VARIABLES=wttype,kstat,ase1,z,sig,ase0.
END MATRIX.""" % (tmpfile1, varlist, tmpvar4, tmpfile2, tmpvar5, wttype,
                        tmpdata2))
                        spss.Submit(r"""
OMSEND TAG=['"%s"'].""" % omstag3)
                        spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2)
                        spss.Submit(r"""
DO IF ase0=-1.
+ RECODE z sig (-1=SYSMIS).
END IF.
EXECUTE.
DELETE VARIABLES ase0.
RECODE ase1 (-1=SYSMIS).
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel)
                        spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase1.""" % cilevel)
                        spss.Submit(r"""
FORMATS kstat ase1 z sig lower upper (F11.3).
VARIABLE LABELS kstat %s.""" % _smartquote(_("""Kappa""")))
                        spss.Submit(r"""
VARIABLE LABELS ase1 %s.""" % _smartquote(_("""Asymptotic Standard Error""")))
                        spss.Submit(r"""
VARIABLE LABELS z %s.""" % _smartquote(_("""Z""")))
                        spss.Submit(r"""
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                        spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                        spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                        if wttype == 1:
                            spss.Submit(r"""
VARIABLE LABELS wttype %s.""" % _smartquote(_("""Linear""")))
                        if wttype == 2:
                            spss.Submit(r"""
VARIABLE LABELS wttype %s.""" % _smartquote(_("""Quadratic""")))
                        spss.Submit(r"""
EXECUTE.
""")
                        spss.Submit(r"""
OMS
  /SELECT TABLES 
  /IF COMMANDS=['Weighted Kappa'] SUBTYPES=['Notes']
  /DESTINATION VIEWER=NO
  /TAG = '"%s"'.""" % omstag4)
                        spss.Submit(r"""
OMS
  /SELECT TEXTS
  /IF COMMANDS=['Weighted Kappa'] LABELS=['Active Dataset']
  /DESTINATION VIEWER=NO
  /TAG = '"%s"'.""" % omstag5)
                        if len(warntext) > 0:
                            spss.Submit(r"""
OMS
 /SELECT HEADINGS
 /IF COMMANDS=['Weighted Kappa']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag6)
                        try:
                            cur = spss.Cursor(isBinary=False)
                        except:
                            cur = spss.Cursor()
                        data = cur.fetchone()
                        cur.close()
                        spss.StartProcedure(_("Weighted Kappa"),
                                            "Weighted Kappa")
                        table = spss.BasePivotTable(_("Weighted Kappa"),
                                                    "Kappa",
                                                    caption=caption)
                        table.SimplePivotTable(rowdim = _("Weighting"),
                           rowlabels = [CellText.String(spss.GetVariableLabel(0))],
                           coldim = "",
                           collabels = [spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3),spss.GetVariableLabel(4), \
                                             spss.GetVariableLabel(5),spss.GetVariableLabel(6)],
                           cells = [data[1],data[2],data[3],data[4],data[5],data[6]])
                        spss.EndProcedure()
                        if len(warntext) > 0:
                            spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag6)
        finally:
            try:
                spss.Submit(r"""
DATASET CLOSE %s.""" % tmpdata1)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds)
                if validn >= 2:
                    if min(sd1, sd2) > 0:
                        if cattest == 0:
                            if min(cats) >= 1:
                                spss.Submit(r"""
OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5))
                                spss.Submit(r"""
DATASET CLOSE %s.""" % tmpdata2)
                                spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile2)
                        spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile1)
            except:
                pass
            spss.Submit(r"""
RESTORE.
""")
def fleisskappaextension(variables, cilevel=95):

    varnames = expandvarnames(variables)
    vardict = spssaux.VariableDict(varnames)
    if len(vardict) != len(varnames):
        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""An invalid variable has been specified. This command is not executed."""
              ))
        spss.EndProcedure()
    elif len(varnames) < 2:
        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
        table = spss.BasePivotTable("Warnings ", "Warnings")
        table.Append(spss.Dimension.Place.row, "rowdim", hideLabels=True)
        rowLabel = CellText.String("1")
        table[(rowLabel, )] = CellText.String(
            _("""At least two variables must be specified. This command is not executed."""
              ))
        spss.EndProcedure()

    else:
        try:
            warntext = []
            if cilevel < 50:
                warntext.append(
                    _("CILEVEL cannot be less than 50%. It has been reset to 50%."
                      ))
                cilevel = 50
            if cilevel > 99.999:
                warntext.append(
                    _("CILEVEL cannot be greater than 99.999%. It has been reset to 99.999%."
                      ))
                cilevel = 99.999
            if cilevel == int(cilevel):
                cilevel = int(cilevel)
            varlist = varnames[0]
            for i in range(1, len(varnames)):
                varlist = varlist + ' ' + varnames[i]
            spss.Submit("PRESERVE.")
            tempdir = tempfile.gettempdir()
            spss.Submit("""CD "%s".""" % tempdir)
            wtvar = spss.GetWeightVar()
            if wtvar != None:
                spss.Submit(r"""
COMPUTE %s=RND(%s).""" % (wtvar, wtvar))
                spss.Submit(r"""
EXECUTE.""")
                wtdn = GetWeightSum(varnames)
            else:
                wtdn = spss.GetCaseCount()
            maxloops = wtdn + 1
            spss.Submit(
                """SET PRINTBACK=OFF MPRINT=OFF OATTRS=ENG MXLOOPS=%s.""" %
                maxloops)
            activeds = spss.ActiveDataset()
            if activeds == "*":
                activeds = "D" + str(random.uniform(.1, 1))
                spss.Submit("DATASET NAME %s" % activeds)
            tmpvar1 = "V" + str(random.uniform(.1, 1))
            tmpvar2 = "V" + str(random.uniform(.1, 1))
            tmpvar3 = "V" + str(random.uniform(.1, 1))
            tmpfile1 = "F" + str(random.uniform(.1, 1))
            tmpfile2 = "F" + str(random.uniform(.1, 1))
            tmpdata1 = "D" + str(random.uniform(.1, 1))
            tmpdata2 = "D" + str(random.uniform(.1, 1))
            tmpdata3 = "D" + str(random.uniform(.1, 1))
            omstag1 = "T" + str(random.uniform(.1, 1))
            omstag2 = "T" + str(random.uniform(.1, 1))
            omstag3 = "T" + str(random.uniform(.1, 1))
            omstag4 = "T" + str(random.uniform(.1, 1))
            omstag5 = "T" + str(random.uniform(.1, 1))
            omstag6 = "T" + str(random.uniform(.1, 1))
            lowlabel = _("""Lower %s%% Asymptotic CI Bound""") % cilevel
            upplabel = _("""Upper %s%% Asymptotic CI Bound""") % cilevel
            spss.Submit(r"""
DATASET COPY %s WINDOW=HIDDEN.""" % tmpdata1)
            spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata1)
            filt = spssaux.GetSHOW("FILTER", olang="english")
            if filt != "No case filter is in effect":
                filtcond = filt.strip("(FILTER)")
                select = "SELECT IF " + str(filtcond) + "."
                spss.Submit("""%s""" % select)
                spss.Submit("""EXECUTE.""")
                spss.Submit("""USE ALL.""")
            banana = spssaux.getDatasetInfo(Info="SplitFile")
            if banana != "":
                warntext.append(_("This command ignores split file status."))
                spss.Submit(r"""SPLIT FILE OFF.""")
            spss.Submit(r"""
COUNT %s=%s (MISSING).""" % (tmpvar1, varlist))
            spss.Submit(r"""
SELECT IF %s=0.""" % tmpvar1)
            spss.Submit(r"""
EXECUTE.
MISSING VALUES ALL ().""")
            validn = spss.GetCaseCount()
            if wtvar == None:
                spss.Submit(r"""
SAVE OUTFILE=%s.""" % tmpfile1)
            else:
                spss.Submit(r"""
DO IF %s >= 1.""" % wtvar)
                spss.Submit(r"""
+ LOOP #i=1 TO %s.""" % wtvar)
                spss.Submit(r"""
XSAVE OUTFILE=%s
  /KEEP=%s
  /DROP=%s.""" % (tmpfile1, varlist, wtvar))
                spss.Submit(r"""
+ END LOOP.
END IF.
EXECUTE.
""")
            spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Variables to Cases'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag1)
            spss.Submit(r"""
VARSTOCASES
  /MAKE %s FROM %s.""" % (tmpvar2, varlist))
            spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag1)
            catdata = []
            try:
                cur = spss.Cursor(isBinary=False)
            except:
                cur = spss.Cursor()
            while True:
                datarow = cur.fetchone()
                if datarow is None:
                    break
                catdata.append(datarow[-1])
            cur.close()
            cats = list(set(catdata))
            ncats = len(cats)
            nraters = len(varnames)
            neededn = max(ncats, nraters)
            if validn < neededn:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""There are too few complete cases. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
            elif ncats < 2:
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table = spss.BasePivotTable("Warnings ", "Warnings")
                table.Append(spss.Dimension.Place.row,
                             "rowdim",
                             hideLabels=True)
                rowLabel = CellText.String("1")
                table[(rowLabel, )] = CellText.String(
                    _("""All ratings are the same. This command is not executed."""
                      ))
                spss.EndProcedure()
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
            else:
                if len(warntext) > 0:
                    spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag2)
                    if len(warntext) == 1:
                        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String("%s" %
                                                              warntext[0])
                        spss.EndProcedure()
                    if len(warntext) == 2:
                        spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                        table = spss.BasePivotTable("Warnings ", "Warnings")
                        table.Append(spss.Dimension.Place.row,
                                     "rowdim",
                                     hideLabels=True)
                        rowLabel = CellText.String("1")
                        table[(rowLabel, )] = CellText.String(
                            "%s \n"
                            "%s" % (warntext[0], warntext[1]))
                        spss.EndProcedure()
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag2)
                spss.Submit(r"""
AGGREGATE
  /OUTFILE=%s
  /BREAK=%s
  /%s=N.""" % (tmpfile2, tmpvar2, tmpvar3))
                spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata2)
                spss.Submit(r"""
DATASET DECLARE %s WINDOW=HIDDEN.""" % tmpdata3)
                spss.Submit(r"""
OMS /SELECT ALL EXCEPT=WARNINGS 
 /IF COMMANDS=['Matrix'] 
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag3)
                spss.Submit(r"""
MATRIX.
GET x 
  /FILE=%s
  /VARIABLES=%s.
GET ratecats
  /FILE=%s
  /VARIABLES=%s.
COMPUTE n=NROW(x).
COMPUTE c=NROW(ratecats).
COMPUTE y=MAKE(n,c,0).
LOOP i=1 to n.
+ LOOP j=1 to NCOL(x).
+   LOOP k=1 to c.
+     DO IF x(i,j)=ratecats(k).
+       COMPUTE y(i,k)=y(i,k)+1.
+     END IF.
+   END LOOP.
+ END LOOP.
END LOOP.
COMPUTE k=NCOL(x).
COMPUTE pe=MSUM((CSUM(y)/MSUM(y))&**2).
COMPUTE pa=MSSQ(y)/(NROW(y)*k*(k-1))-(1/(k-1)).
COMPUTE kstat=(pa-pe)/(1-pe).
COMPUTE cp=(CSSQ(y)-CSUM(y))&/((k-1)&*CSUM(y)).
COMPUTE pj=CSUM(y)/MSUM(y).
COMPUTE one=MAKE(1,NCOL(pj),1).
COMPUTE qj=one-pj.
COMPUTE kj=(cp-pj)&/qj.
COMPUTE num=2*((pj*t(qj))**2-MSUM(pj&*qj&*(qj-pj))).
COMPUTE den=n*k*(k-1)*((pj*t(qj))**2).
COMPUTE ase=SQRT(num/den).
COMPUTE z=kstat/ase.
COMPUTE sig=1-CHICDF(z**2,1).
SAVE {kstat,ase,z,sig}
   /OUTFILE=%s
   /VARIABLES=kstat,ase,z,sig.
COMPUTE asej=MAKE(1,c,SQRT(2/(n*k*(k-1)))).
COMPUTE zj=kj&/asej.
COMPUTE sigj=one-CHICDF(zj&**2,1).
SAVE {ratecats,t(cp),t(kj),t(asej),t(zj),t(sigj)}
  /OUTFILE=%s
  /VARIABLES=category,cp,kstat,ase,z,sig.
END MATRIX.""" % (tmpfile1, varlist, tmpfile2, tmpvar2, tmpdata2, tmpdata3))
                spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag3)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata2)
                spss.Submit(r"""
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
FORMATS kstat ase z sig lower upper (F11.3).
VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa""")))
                spss.Submit(r"""
VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error""")))
                spss.Submit(r"""
VARIABLE LABELS z %s. """ % _smartquote(_("""Z""")))
                spss.Submit(r"""
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                spss.Submit(r"""
EXECUTE.
""")
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                data1 = cur.fetchone()
                cur.close()
                collabels1=[spss.GetVariableLabel(0),spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \
                                         spss.GetVariableLabel(4),spss.GetVariableLabel(5)]
                celldata1 = [
                    data1[0], data1[1], data1[2], data1[3], data1[4], data1[5]
                ]
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % tmpdata3)
                spss.Submit(r"""
COMPUTE lower=kstat-SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
COMPUTE upper=kstat+SQRT(IDF.CHISQUARE(%s/100,1))*ase.""" % cilevel)
                spss.Submit(r"""
FORMATS category (F10.0) cp kstat ase z sig lower upper (F11.3).
VARIABLE LABELS category %s. """ % _smartquote(_("""Rating Category""")))
                spss.Submit(r"""
VARIABLE LABELS cp %s. """ % _smartquote(_("""Conditional Probability""")))
                spss.Submit(r"""
VARIABLE LABELS kstat %s. """ % _smartquote(_("""Kappa""")))
                spss.Submit(r"""
VARIABLE LABELS ase %s. """ % _smartquote(_("""Asymptotic Standard Error""")))
                spss.Submit(r"""                
VARIABLE LABELS z %s. """ % _smartquote(_("""Z""")))
                spss.Submit(r""" 
VARIABLE LABELS sig %s. """ % _smartquote(_("""P Value""")))
                spss.Submit(r"""
VARIABLE LABELS lower %s. """ % _smartquote(_(lowlabel)))
                spss.Submit(r"""
VARIABLE LABELS upper %s. """ % _smartquote(_(upplabel)))
                spss.Submit(r""" 
EXECUTE.""")
                spss.Submit(r"""
OMS
 /SELECT TABLES
 /IF COMMANDS=['Fleiss Kappa'] SUBTYPES=['Notes']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag4)
                spss.Submit(r"""
OMS
 /SELECT TEXTS
 /IF COMMANDS=['Fleiss Kappa'] LABELS=['Active Dataset']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag5)
                if len(warntext) > 0:
                    spss.Submit(r"""
OMS
 /SELECT HEADINGS
 /IF COMMANDS=['Fleiss Kappa']
 /DESTINATION VIEWER=NO
 /TAG = '"%s"'.""" % omstag6)
                n = spss.GetCaseCount
                rlabels = []
                data2 = []
                try:
                    cur = spss.Cursor(isBinary=False)
                except:
                    cur = spss.Cursor()
                for i in range(0, spss.GetCaseCount()):
                    datarow = cur.fetchone()
                    data2.append(datarow[1:])
                    rlabels.append(datarow[0])
                cur.close()

                def _flatten(seq):
                    for item in seq:
                        if spssaux._isseq(item):
                            for subitem in _flatten(item):
                                yield subitem
                        else:
                            yield item

                data2 = [item for item in _flatten(data2)]
                spss.StartProcedure(_("Fleiss Kappa"), "Fleiss Kappa")
                table1 = spss.BasePivotTable(_("Overall Kappa"),
                                             "Overall Kappa")
                table1.SimplePivotTable(rowdim=_(""),
                                        rowlabels=[CellText.String("Overall")],
                                        coldim="",
                                        collabels=collabels1,
                                        cells=celldata1)
                if any(item != round(item) for item in rlabels):
                    caption = (_(
                        "Non-integer rating category values are truncated for presentation."
                    ))
                else:
                    caption = ("")
                table2 = spss.BasePivotTable(
                    _("Kappas for Individual Categories"),
                    _("Individual Category Kappa Statistics"),
                    caption=caption)
                rowlabels = [(CellText.String("{:>9.0f}".format(rlabels[i])))
                             for i in range(len(rlabels))]
                collabels=[spss.GetVariableLabel(1),spss.GetVariableLabel(2),spss.GetVariableLabel(3), \
                      spss.GetVariableLabel(4),spss.GetVariableLabel(5),spss.GetVariableLabel(6), \
                      spss.GetVariableLabel(7)]
                table2.SimplePivotTable(rowdim=_("  Rating Category"),
                                        rowlabels=rowlabels,
                                        coldim="",
                                        collabels=collabels,
                                        cells=data2)
                spss.EndProcedure()
                if len(warntext) > 0:
                    spss.Submit(r"""
OMSEND TAG = ['"%s"'].""" % omstag6)
        finally:
            try:
                spss.Submit("""
DATASET CLOSE %s.""" % tmpdata1)
                spss.Submit(r"""
DATASET ACTIVATE %s WINDOW=ASIS.""" % activeds)
                if validn >= neededn:
                    if ncats >= 2:
                        spss.Submit("""
OMSEND TAG=['"%s"' '"%s"'].""" % (omstag4, omstag5))
                        spss.Submit("""
DATASET CLOSE %s.""" % tmpdata2)
                        spss.Submit("""
DATASET CLOSE %s.""" % tmpdata3)
                        spss.Submit("""
ERASE FILE=%s.""" % tmpfile1)
                        spss.Submit(r"""
ERASE FILE=%s.""" % tmpfile2)
            except:
                pass
            spss.Submit("""
RESTORE.
""")
Beispiel #20
0
def recode(varlist,
           recodes,
           stringsize=None,
           makevaluelabels=True,
           copyvariablelabels=True,
           useinputvallabels=False,
           suffix="",
           prefix=""):

    vardict = spssaux.VariableDict(caseless=True)
    isutf8 = spss.PyInvokeSpss.IsUTF8mode()
    ecutf8 = codecs.getencoder("utf_8")
    inputlist, outputlist, vartype = parsevarlist(varlist, vardict)
    if len(recodes) > 1:
        raise ValueError(
            _("The RECODES subcommand must consist of a single, quoted specification"
              ))
    # recodespec is a list of textual recode syntax, one item per value set
    # vldefs is a dictionary with keys the target values
    # and values the input codes
    # inputdict is a dictionary with keys the target values
    # and values a list of the input codes
    recodespec, vldefs, inputdict = parserecodes(recodes[0], vartype,
                                                 stringsize)
    valuelabelmessage = checklabelconsistency(inputlist, vardict)

    if stringsize:
        alter = []
        create = []
        for v in outputlist:
            try:
                if vardict[v].VariableType != stringsize:
                    alter.append(v)
            except:
                create.append(v)
        if create:
            spss.Submit("STRING %s (A%s)." % (" ".join(create), stringsize))
        if alter:
            spss.Submit("ALTER TYPE %s (A%s)" % (" ".join(alter), stringsize))

    spss.Submit(
        """RECODE %s %s INTO %s.""" %
        (" ".join(inputlist), " ".join(recodespec), " ".join(outputlist)))

    # generate variable labels if requested
    if copyvariablelabels:
        if prefix and not prefix.endswith(" "):
            prefix = prefix + " "
        if suffix and not suffix.startswith(" "):
            suffix = " " + suffix
        for vin, vout in zip(inputlist, outputlist):
            spss.Submit("""VARIABLE LABEL %s %s.""" % \
                (vout, _smartquote(prefix + vardict[vin].VariableLabel + suffix, True)))

    # generate value labels if requested
    # all values for given target are merged but else clause is omitted
    # VALUE LABELS syntax quotes values regardless of variable type
    # vldefs is a dictionary with keys of the output values and
    # values a string listing the input values.  If copying value labels
    # the first input variable is used as the source.
    if makevaluelabels:
        if useinputvallabels:
            vldefs = makevallabels(vldefs, inputdict,
                                   vardict[inputlist[0]].ValueLabels, isutf8,
                                   ecutf8)

        # ensure that copy as target does not generate a value label
        copyset = set()
        for target in vldefs:
            if target.lower() == "copy":
                copyset.add(target)
        for c in copyset:
            del (vldefs[c])

        #spss.Submit(r"""VALUE LABELS %s %s.""" % (" ".join(outputlist), \
        #" ".join([_smartquote(val, vartype == 2) + " " + _smartquote(label, True) for val, label in vldefs.items()])))

        spss.Submit(r"""VALUE LABELS %s %s.""" % (" ".join(outputlist), \
            " ".join([val + " " + _smartquote(label, True) for val, label in list(vldefs.items())])))
    if valuelabelmessage:
        print(valuelabelmessage)
Beispiel #21
0
def buildspec(dims, dss, catvars, totvars, encoding, finalweight):
    """create raking specification and return control variable list and totals list
    
    dims is a list of dimension variables, categories, and totals
    dss, catvars, and totvars are alternative ways of specifying the same information
    dss is a list of dataset names, catvars a list of category variable names, and 
    totvars a list of the corresponding control totals"""
    
    vardict = spssaux.VariableDict()
    if finalweight in vardict:
        raise ValueError(_("FINALWEIGHT cannot specify an existing variable name"))    
    ctlvars= []
    ctltotals = []
    activedsname = spss.ActiveDataset()
    if activedsname == "*": #unnamed
        activedsname = "D" + str(random.uniform(.1,1))
        spss.Submit("DATASET NAME %s" % activedsname)

    for dim in dims:    
        if dim:
            v = dim
            if not isinstance(v[0], str):
                vvname = str(v[0], encoding)
            else:
                vvname = v[0]
            if not v[0] in vardict:
                raise ValueError(_("A control total variable does not exist: %s") % vvname)
            if not vardict[v[0]].VariableType == 0:
                raise ValueError(_("A nonnumeric variable was specified for a control dimension: %s") % vvname)
            if len(v) == 1 or not len(v) % 2 == 1:
                raise ValueError(_("An invalid set of values and totals was found for a control dimension: %s") % " ".join(v))
            ctlvars.append(v[0])
            #ctltotals.append(dict([(float(k),float(v)) for k,v in zip(v[1::2], v[2::2])]))
            try:
                # category totals can be numerical expressions
                # convert to a value after insuring that all numbers are floats
                ctltotals.append(dict([(float(k), float(eval(decimalize(v)))) for k,v in zip(v[1::2], v[2::2])]))
            except:
                raise ValueError(_("""Invalid category or category total for variable: %s""") % vvname)
    for i, ds in enumerate(dss):
        catvar = catvars[i]
        totvar = totvars[i]
        if not any([ds, catvar, totvar]):
            continue
        if ds and (catvar is None or totvar is None):
            raise ValueError(_("""A dataset was specified without the category or totals variable names: %s""") % ds)
        try:
            spss.Submit("DATASET ACTIVATE %s" % ds)
            dta = spssdata.Spssdata([catvar, totvar], names=False).fetchall()
            ctlvars.append(catvar)
            # A dataset value might be simply numeric or a string expression
            ctltotals.append(dict([(float(k), float(eval(decimalize((v))))) for k,v in dta]))
        except: # error conditions include nonexistant dataset and variables and type problems
            spss.Submit("DATASET ACTIVATE %s" % activedsname)
            raise
    spss.Submit("DATASET ACTIVATE %s" % activedsname)
    if not ctlvars:
        raise ValueError(_("""No raking specifications were given"""))
    # check for duplicate control variables
    ctllc = [v.lower() for v in ctlvars]
    ctlset = set(ctllc)
    if len(ctllc) != len(ctlset):  # any duplicates?
        for v in ctlset:
            ctllc.remove(v)
        raise ValueError(_("""Duplicate control variables were specified: %s""") % ", ".join(set(ctllc)))
    return ctlvars, ctltotals
Beispiel #22
0
def Run(args):
    """Execute the STATS CORRELATIONS extension command"""

    args = args[list(args.keys())[0]]
    # debugging
    # makes debug apply only to the current thread
    #try:
    #import wingdbstub
    #if wingdbstub.debugger != None:
    #import time
    #wingdbstub.debugger.StopDebug()
    #time.sleep(2)
    #wingdbstub.debugger.StartDebug()
    #import thread
    #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
    ## for V19 use
    ##    ###SpssClient._heartBeat(False)
    #except:
    #print 'debug failed'

    oobj = Syntax([
        Template("VARIABLES",
                 subc="",
                 ktype="existingvarlist",
                 var="variables",
                 islist=True),
        Template("VARIABLES",
                 subc="WITH",
                 ktype="existingvarlist",
                 var="withvars",
                 islist=True),
        Template("CONFLEVEL",
                 subc="OPTIONS",
                 ktype="float",
                 var="clevel",
                 vallist=(25., 99.999)),
        Template("METHOD",
                 subc="OPTIONS",
                 ktype="str",
                 var="method",
                 vallist=("fisher", "bootstrap")),
        Template("LISTWISE", subc="MISSING", ktype="bool", var="listwise"),
        Template("PAIRWISE", subc="MISSING", ktype="bool", var="pairwise"),
        Template("INCLUDE", subc="MISSING", ktype="bool", var="include"),
        Template("EXCLUDE", subc="MISSING", ktype="bool", var="exclude"),
        Template("HELP", subc="", ktype="bool")
    ])

    #enable localization
    global _
    try:
        _("---")
    except:

        def _(msg):
            return msg

    # A HELP subcommand overrides all else
    if "HELP" in args:
        #print helptext
        helper()
    else:
        processcmd(oobj, args, docorr, vardict=spssaux.VariableDict())
Beispiel #23
0
import time
start_time = time.time()
"""
Checks availability of variables in dataset and makes new dataset from selection variable and list of variables.
"""
from vars_and_functions import global_vars as gb
from vars_and_functions import reverse_items
import spss
import spssaux
import pandas as pd
import time

log = gb.folder + 'out/check_variables_log.txt'
syntax_save = gb.folder + 'out/check_variables.sps'
spssaux.OpenDataFile(gb.org_data)
test = spssaux.VariableDict()
print(test)
vars = ['x4_6aevyt01', 'x4_6aevyt02']

for var in reversed(vars):
    i = test[var].index
    dataCursor = spss.Cursor([i])
    oneVar = dataCursor.fetchall()
    #extending the example to get the actual list of values.
    uniqueList = list((set(oneVar)))
    uniq_vals = [
        int(x[0]) for x in uniqueList
        if x[0] != None and x[0] not in gb.missing_int
    ]
    uniq_vals_rev = reversed(uniq_vals)
    dataCursor.close()
def Run(args):
    """Execute the STATS REGRESS PLOT command"""

    args = args[list(args.keys())[0]]
    ###print args   #debug

    oobj = Syntax([
        Template("YVARS",
                 subc="",
                 ktype="existingvarlist",
                 var="yvars",
                 islist=True),
        Template("XVARS",
                 subc="",
                 ktype="existingvarlist",
                 var="xvars",
                 islist=True),
        Template("COLOR", subc="", ktype="existingvarlist", var="color"),
        Template("SIZE", subc="", ktype="existingvarlist", var="size"),
        Template("SHAPE", subc="", ktype="existingvarlist", var="shape"),
        Template("LABEL", subc="", ktype="existingvarlist", var="label"),
        Template("LINEAR", subc="FITLINES", ktype="bool", var="linear"),
        Template("QUADRATIC", subc="FITLINES", ktype="bool", var="quadratic"),
        Template("CUBIC", subc="FITLINES", ktype="bool", var="cubic"),
        Template("LOESS", subc="FITLINES", ktype="bool", var="loess"),
        Template("IGNORE", subc="FITLINES", ktype="str", var="ignore"),
        Template("APPLYTO",
                 subc="FITLINES",
                 ktype="str",
                 var="applyfitto",
                 vallist=["total", "group"]),
        Template("CATEGORICAL",
                 subc="OPTIONS",
                 ktype="str",
                 var="categorical",
                 vallist=["bars", "lines", "boxplot"]),
        Template("GROUP", subc="OPTIONS", ktype="int", var="group"),
        Template("BOXPLOTS", subc="OPTIONS", ktype="bool", var="boxplots"),
        Template("HEXBIN", subc="OPTIONS", ktype="bool", var="hexbin"),
        Template("TITLE", subc="OPTIONS", ktype="literal", var="title"),
        Template("INDENT",
                 subc="OPTIONS",
                 ktype="int",
                 var="indent",
                 vallist=[0, 50]),
        Template("YSCALE",
                 subc="OPTIONS",
                 ktype="int",
                 var="yscale",
                 vallist=[50, 100]),
        Template("PAGEX",
                 subc="OPTIONS",
                 ktype="float",
                 var="pagex",
                 vallist=[1]),
        Template("PAGEY",
                 subc="OPTIONS",
                 ktype="float",
                 var="pagey",
                 vallist=[1]),
        Template("HELP", subc="", ktype="bool")
    ])

    # ensure localization function is defined
    global _
    try:
        _("---")
    except:

        def _(msg):
            return msg

        # A HELP subcommand overrides all else
    if "HELP" in args:
        #print helptext
        helper()
    else:
        processcmd(oobj, args, plots, vardict=spssaux.VariableDict())
def genVarMacro(variables, countvalues, order, macroname, mincount,
            minpercent, maxcount, maxpercent, separator, weightvar, missing):
    """Generate a macro listing the variables in order of the weighted counts
    
    variables is the list of candidate variables
    countvalues is a list of the values to be counted
    order is a or d for the variable order in the macro
    macroname is the name of the macro to be generated
    mincount and minpercent specify minimum thresholds for including a variable (<)
    maxcount and maxpercent specific maximum thresholds (>=)
    separator is the variable name separator to use in the macro definition
    weightvar is the name of the weight variable or None
    missing specifies the missing value treatment"""
    
    if weightvar:
        varnamesAndWeight = variables + [weightvar]
    else:
        varnamesAndWeight = variables
    nvar = len(variables)
    if len(separator) == 0:
        separator = " "
    vardict = spssaux.VariableDict(variables)
    types = set(min(v.VariableType, 1) for v in vardict)
    if len(types) > 1:
        raise ValueError(_("""Variable must all be of the same type"""))
    vartypes = types.pop()
    if vartypes == 0:
        try:
            countvalues = [float(v) for v in countvalues]
        except:
            raise ValueError(_("""A non-numeric value to count was specified for a numeric variable"""))
    else:
        countvalues = [v.rstrip() for v in countvalues]
    countvalues = set(countvalues)
    curs = spssdata.Spssdata(indexes=varnamesAndWeight, names=False, 
        convertUserMissing=False, omitmissing=missing == 'exclude')

    counts = {}  # a dictionary of weighted counts with variable names as keys
    # populate counts as all zeros so that all variables will
    # appear in the dictionary for later use
    for v in variables:
        counts[v] = 0    
    w = 1.0
    wsum = 0
    minpercent = minpercent / 100.
    if maxpercent is not None:
        maxpercent = maxpercent / 100.
    # calculate weighted count of counted values for each variable
    # string variables must be trimmed to match counted values list

    for case in curs:
        if weightvar:
            w = case[nvar]
            if w is None:
                w = 0.0
        wsum += w   # accumulate weight
        if vartypes == 1:
            case = [val.rstrip() for val in case[:nvar]] # don't include any weight variable   
        for i in range(nvar):
            if case[i] in countvalues:
                counts[variables[i]] = counts[variables[i]] + w