def getmetadata(variables, missing):
    """Return dictionaries of value labels and missing values
    
    variables is a list of variable names to process
    missing specifies the missing value treatment"""
    
    # vldict is a dictionary indexed by variable name with each entry
    # a set of labelled values
    # missingsdict is a dictionary indexed by variable name with each entry
    # a set of missing values.  Range specifictions are silently ignored.
    # if missing == "exclude", missing values are removed from the labels set
    
    vldict = {}
    missingsdict = {}
    with spss.DataStep():
        ds = spss.Dataset()
        for v in variables:
            vldict[v] = set(ds.varlist[v].valueLabels.data.keys())
            mvs = ds.varlist[v].missingValues
            if mvs[0] < 0:
                mvs = [mvs[3]]
            else:
                mvs = mvs[1:]
            missingsdict[v] = set([item for item in mvs if item is not None])
            if missing == "exclude":
                vldict[v] = vldict[v] - missingsdict[v]
    return vldict, missingsdict 
 def __init__(self):
     ##self.vardict = spssaux.VariableDict()
     spss.StartDataStep()
     self.ds = spss.Dataset()
     self.varlist = self.ds.varlist
     self.mrsets = {}
     # the api always returns the set name in upper case
     for name, theset in self.ds.multiResponseSet.data.iteritems():
         self.mrsets[name.upper()] = theset
Ejemplo n.º 3
0
 def __init__(self):
     try:
         spss.StartDataStep()
     except:
         spss.Submit("EXECUTE.")
         spss.StartDataStep()
     self.ds = spss.Dataset()
     self.varlist = self.ds.varlist
     self.mrsets = {}
     # the api always returns the set name in upper case
     for name, theset in self.ds.multiResponseSet.data.items():
         self.mrsets[name.upper()] = theset
Ejemplo n.º 4
0
 def addinfo(filespec):
     """open the file if appropriate type, extract variable information, and add it to dataset dsname.
     
     filespec is the file to open
     dsname is the dataset name to append to
     filetypes is the list of file types to include."""
     
     fnsplit = os.path.split(filespec)[1]
     fn, ext = os.path.splitext(fnsplit)
     for ft in filetypes:
         if ext in ftdict[ft]:
             if pat is None or pat.match(fn):
                 try:
                     spss.Submit(spsscmd[ft] % filespec)
                     spss.Submit("DATASET NAME @__GATHERMD__.")
                 except:
                     if not isinstance(filespec, str):
                         filespec = str(filespec, encoding)
                     raise EnvironmentError(_("File could not be opened, skipping: %s") % filespec)
                 break
     else:
         return addinfo
     
     with DataStep():
         ds = spss.Dataset(name=dsname)  # not the active dataset
         dssource = spss.Dataset(name="*")  # The dataset to examine
         numvars = spss.GetVariableCount() # active dataset
         variables = dssource.varlist
         for v in range(numvars):
             lis = [filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v)]
             lis.extend(blanks)
             lis = [item+ 256*" " for item in lis]
             ds.cases.append(lis)
             #ds.cases.append([filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v), *blanks])
             if includeAttrs:
                 attrs = variables[v].attributes.data
                 for a in attrs:
                     if a.lower() in attrindexes:
                         ds.cases[-1, attrindexes[a.lower()]+ 3] = attrs[a][0] +  attrlength * " "# allow for standard variables
     spss.Submit("DATASET CLOSE @__GATHERMD__.")
Ejemplo n.º 5
0
def getvalues(num, denom, id, dsname):
    """return vectors of num.  denom, and id values from constants in syntax or variable values"""

    if isname(num[0]) or isname(denom[0]) or isname(id):
        spss.StartDataStep()
        ds = spss.Dataset(dsname)
    else:
        ds = None
    id = [id]
    try:
        vallist = []
        if ds:
            vl = [v.name.lower()
                  for v in ds.varlist]  # variables in the dataset
        for v in num, denom, id:
            try:
                vallist.append([float(val) for val in v])
            except:  #variable name argument or None
                if v[0] is None:  # can only happen with id variable
                    vallist.append([None])  # null label in case no id variable
                else:
                    if len(v) > 1:
                        raise ValueError(
                            "Error: Only one variable may be named on each of NUM, DENOM, and ID, and a variable may not be combined with a value: "
                            + " ".join(v))
                    try:
                        vindex = vl.index(v[0].lower())
                        vallist.append([val[vindex] for val in ds.cases])
                    except:
                        raise ValueError(
                            "Error: An undefined variable name was specified in NUM, DENOM, or ID: "
                            + " ".join(v))
    finally:
        spss.EndDataStep()

    # check and fix value list lengths
    maxlen = max([len(vl) for vl in vallist])

    for i in range(len(vallist)):
        if len(vallist[i]) == 1:
            vallist[i] = maxlen * vallist[i]
        if len(vallist[i]) != maxlen:
            raise ValueError(
                "Error: NUM, DENOM and optional ID do not all have the same number of items"
            )
    return vallist
Ejemplo n.º 6
0
def genSetsCategoryList(mcset, allvars, resolver, setname, varprefix):
    """Generate sorted list(s) of values with possible insertion of extra values and create SPSS macros.
    
    mcset is the mc set to convert
    allvars is the resolved list of variables in the sets
    resolver is a class that contains the MR set information from the SPSS dictionary.
    setname is the name for the output set
    varprefix is the prefix for variable names to generate"""

    if resolver.getSetType(mcset) != "Categories":
        raise ValueError(
            _("""The specified set is not a multiple category set.  Only a set of that type can be used in this procedure: %s"""
              ) % mcset)

    curs = spssdata.Spssdata(
        indexes=allvars, names=False)  # keep cases w missing, mv's set to None
    nvar = len(allvars)

    vvalues = set()
    for case in curs:
        for i in range(nvar):
            if not case[i] is None:  # omit sysmis and user missing values
                if resolver.getVarType(mcset) == "String":
                    val = case[i].rstrip()
                else:
                    val = case[i]
                vvalues.add(val)
    curs.CClose()
    if len(vvalues) == 0:
        raise ValueError(
            _("""There are no values in the set variables for set: %s""" %
              mcset))

    # copy values labels from the first variable in the set
    # MC sets are expected to have consistent value labels across variable
    # if any are defined.
    with spss.DataStep():
        valuelabels = spss.Dataset().varlist[allvars[0]].valueLabels.data

    manager = ManageValues(resolver, mcset, vvalues, setname, varprefix,
                           valuelabels)
    manager.genData()
    manager.setgen()
    return (manager.generatednames, manager.generatedvalues,
            manager.generatedlabels)
Ejemplo n.º 7
0
def metadata(datain, path):
  f = open(path + ".met", "w")  # open the metadata file
  f.write("standard;\n")
  f.write("variables\n")  # write the variable metadata
  if not re.match(r"\.sav$", datain, flags=re.IGNORECASE):  # get datain
    datain += ".sav"  # add .sav
  spss.Submit("get file='{0}'.".format(datain))
  spss.StartDataStep()
  ds = spss.Dataset()
  type = -1
  frames = 0
  for var in ds.varlist:
    line = "  name={0}".format(var.name)  # name
    if var.label:
      line += ' label="{0}"'.format(var.label.replace('"', '"'))  # label
    if var.type != type:
      if var.type == 0:  # type and width
        line += " type=float width=8"
      else:
        line += " type=char width={0}".format(var.type)
      type = var.type
    if var.valueLabels:  # codeframe
      line += ' codeframe="{0}"'.format(var.name)
      frames = 1
    line += ";\n"
    f.write(line)
  if frames:  # write the codeframe metadata
    f.write("codeframes\n")
    for var in ds.varlist:
      if var.valueLabels:
        f.write("  name={0}\n".format(var.name))
        for val, lab in var.valueLabels.data.iteritems():
          f.write('    {0} = "{1}"\n'.format(val, lab))
        f.write("  ;\n")
  ds.close()
  spss.EndDataStep()
  f.close()
  return 0
def anon(varnames,
         nameroot=None,
         svalueroot='',
         method='sequential',
         seed=None,
         offset=None,
         scale=None,
         maxrvalue=None,
         onetoone=None,
         namemapping=None,
         valuemapping=None,
         mapping=None,
         ignorethis=None):
    """Anonymize the specified variables
    
    varnames is the list of input variables.
    nameroot, if specified, is used as a prefix to rename variables with a numerical suffix.
    svalueroot, if specified, gives a prefix to be prepended to transformed values
    of string variables.
    method = 'sequential' (default), 'random', or 'transform'.
    seed, if specified, is used to initialize the random number generator
    offset and scale, required if method=transform, are the parameters for a
    linear transform of the values.  If specified for a string variable , sequential is substituted.
    System-missing values are left as sysmis.
    maxrvalue is the maximum value for the random method.  Must be positive.  Only applies
    to random method.
    Can be one value for all variables or a list the size of the variable list with variable-specific
    values
    onetoone is an option list of variable names, a subset of varnames, for which mapped
      values must be unique.  Applies only to method random.  If 1-1 mapping cannot be
      found, an exception is raised.
    namemapping and valuemapping determine whether files with tables of results are saved.
    mapping names a file written as valuemapping to be used to initialize random mappings.
    """

    with DataStep():
        ds = spss.Dataset()
        allvariables = ds.varlist
        varnums = [allvariables[v].index for v in varnames]
        numvars = len(varnums)
        if maxrvalue is None:
            maxrvalue = [9999999]
        if len(maxrvalue) == 1:
            maxrvalue = numvars * maxrvalue
        if len(maxrvalue) != numvars:
            raise ValueError(
                "The number of values for maxrvalue is different from the number of variables"
            )
        if onetoone is None:
            onetoone = []
        onetoone = set([allvariables[v].index for v in onetoone])
        if not onetoone.issubset(set(varnums)):
            raise ValueError(
                "A variable is listed in ONETOONE that is not in the VARIABLES list"
            )
        if seed:
            random.seed(seed)

        trflist = [
            Tvar(allvariables[vn], svalueroot, method, offset, scale,
                 maxrvalue[i], vn in onetoone) for i, vn in enumerate(varnums)
        ]
        mapinputs(trflist,
                  mapping)  #initialize mappings if input mapping given
        todo = list(zip(varnums, trflist))

        for i, case in enumerate(ds.cases):
            for vnum, t in todo:
                ds.cases[i, vnum] = t.trf(case[vnum])

        # remove now irrelevant value labels and missing value codes
        for vn in varnums:
            allvariables[vn].valueLabels = {}
            allvariables[vn].missingValues = (0, None, None, None)

        # rename variables if requested
        # first find a number that guarantees no name conflicts.
        if nameroot:
            basenum = 0
            pat = re.compile(r"%s(\d+)$" % nameroot, re.IGNORECASE)
            for v in allvariables:
                try:
                    vnum = re.match(pat, v.name).group(1)
                    basenum = max(basenum, int(vnum))
                except:
                    pass
            basenum += 1
            if namemapping:
                f = codecs.open(namemapping, "w", encoding="utf_8_sig")
            for vn in varnums:
                newname = nameroot + str(basenum)
                if len(newname) > 64:
                    raise ValueError(
                        "A replacement variable name is too long: %s" %
                        newname)
                if namemapping:
                    f.write("%s = %s%s" %
                            (allvariables[vn].name, newname, lineend))
                allvariables[vn].name = newname
                basenum += 1
            if namemapping:
                f.close()
                print("Variable name mappings written to file: %s" %
                      namemapping)
        ds.close()

        # write file of value mappings for each mapped variable in csv format
        if valuemapping:
            #f = codecs.open(valuemapping, "w", encoding="utf_8_sig")
            #csvout = csv.writer(f)
            f = file(valuemapping, "w")
            csvout = UnicodeWriter(f)
            for t in trflist:
                t.write(csvout)
            f.close()
            print("Value mappings written to file: %s" % valuemapping)
Ejemplo n.º 9
0
def PCA(StandardizedPCAInput, varList, regionId):
    """ Use SPSS python api to perform PCA
         
        Arguments:
        PCAInput - 2d python list for PCA input
        varList - a list of variables for each columns in the PCA input
         
        Returns:
        CorrelationMatrix - Correlation matrix
        KMO - Kaiser-Mayer-Olkin value
        Bartlett_sig - Significance value of Bartlett's Sphericity Test
        Communalities - Communalities of extracted components
        VarExplainedInfo - Variance explained from unrotated solution, including absolute variance, % of variance, and cummulative %
        RotatedVarExplainedInfo - Rotated variance explained from unrotated solution, including absolute variance, % of variance, and cummulative %
        ComponentMatrix - Unrotated component loading matrix
        RotatedComponentMatrix - Rotated component loading matrix
        ComponentScoreCoefficientMatrix - Component score coefficient derived from rotated solution
        ComponentScore - Component score derived from score coefficient
    """

    # SPSS command & dataset setup
    spss.Submit("NEW FILE")
    with spss.DataStep():
        datasetObj = spss.Dataset()
        for var in varList:
            datasetObj.varlist.append(var)
        for row in StandardizedPCAInput:
            datasetObj.cases.append(row)
    if regionId == 18:
        debugFileOutputDir = r'C:\Users\hxiong\Dropbox\Haoyi Vulnerability\Simulation\Hurricane_Sandy'
        np.savetxt(np.array(StandardizedPCAInput),
                   r'%s\PCAInut_r%d' % regionId,
                   fmt='%.7f')
    spssPCASyntax = """FACTOR 
    /VARIABLES {0}
    /MISSING LISTWISE 
    /ANALYSIS {0}
    /PRINT UNIVARIATE INITIAL CORRELATION KMO EXTRACTION ROTATION FSCORE 
    /CRITERIA MINEIGEN(1) ITERATE(25) 
    /EXTRACTION PC 
    /CRITERIA ITERATE(100) 
    /ROTATION VARIMAX 
    /SAVE REG(ALL) 
    /METHOD=CORRELATION.""".format(' '.join(varList))
    spss.SetOutput("off")
    varNum = len(varList)
    # Create XML output from SPSS
    tag = spssaux.CreateXMLOutput(spssPCASyntax, omsid='Factor Analysis')
    # Get correlation matrix
    CorrelationMatrix = spssaux.getValuesFromXmlWorkspace(tag,
                                                          'Correlation Matrix',
                                                          cellAttrib="number")
    CorrelationMatrix = _spssOutputTableConversion(CorrelationMatrix, varNum,
                                                   varNum)
    # Get KMO and Bartlett Plot_test sig.
    KMO_and_Bartlett = spssaux.getValuesFromXmlWorkspace(
        tag, 'KMO and Bartlett Test', cellAttrib="number")
    KMO_and_Bartlett = _spssOutputTableConversion(KMO_and_Bartlett, 1)
    NonpositiveDefiniteCorM = False
    KMO = 0.
    Bartlett_sig = 0.
    if (len(KMO_and_Bartlett) == 0):
        NonpositiveDefiniteCorM = True
    else:
        KMO = KMO_and_Bartlett[0]
        Bartlett_sig = KMO_and_Bartlett[3]
    # Get Communalities
    Communalities = spssaux.getValuesFromXmlWorkspace(tag,
                                                      'Communalities',
                                                      colCategory="Extraction",
                                                      cellAttrib="number")
    Communalities = _spssOutputTableConversion(Communalities, 1)
    # Get variances explained in unrotated solution
    VarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number"""
    )
    PctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number"""
    )
    CummulativePctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Extraction Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number"""
    )
    VarExplained = _spssOutputTableConversion(VarExplained, 1)
    PctVarExplained = _spssOutputTableConversion(PctVarExplained, 1)
    CummulativePctVarExplained = _spssOutputTableConversion(
        CummulativePctVarExplained, 1)
    VarExplainedInfo = [
        VarExplained, PctVarExplained, CummulativePctVarExplained
    ]
    # Get variances explained in rotated solution
    RotatedVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Total"]/cell/@number"""
    )
    RotatedPctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="% of Variance"]/cell/@number"""
    )
    RotatedCummulativePctVarExplained = spss.EvaluateXPath(
        tag[0], "/outputTree",
        """//pivotTable//category[@text="Rotation Sums of Squared Loadings"]/dimension["Statistics"]/category[@text="Cumulative %"]/cell/@number"""
    )
    RotatedVarExplained = _spssOutputTableConversion(RotatedVarExplained, 1)
    RotatedPctVarExplained = _spssOutputTableConversion(
        RotatedPctVarExplained, 1)
    RotatedCummulativePctVarExplained = _spssOutputTableConversion(
        RotatedCummulativePctVarExplained, 1)
    RotatedVarExplainedInfo = [
        RotatedVarExplained, RotatedPctVarExplained,
        RotatedCummulativePctVarExplained
    ]
    # Get number of extracted components
    if (len(VarExplained) != len(RotatedVarExplained)):
        w = "Region %d: unrotated and rotated solution finds different number of component based on Kaiser Criterion." % regionId
        warnings.warn(w, RuntimeWarning)
    CompNum = len(VarExplained)
    ComponentScoreColumnIndex = [varNum + i for i in xrange(CompNum)]
    # Get component matrix
    ComponentMatrix = spssaux.getValuesFromXmlWorkspace(tag,
                                                        'Factor Matrix',
                                                        cellAttrib="number")
    ComponentMatrix = _spssOutputTableConversion(ComponentMatrix, CompNum,
                                                 varNum)
    # Get rotated component matrix
    RotatedComponentMatrix = spssaux.getValuesFromXmlWorkspace(
        tag, 'Rotated Factor Matrix', cellAttrib="number")
    RotatedComponentMatrix = _spssOutputTableConversion(
        RotatedComponentMatrix, CompNum, varNum)
    # Get component score coefficient matrix
    ComponentScoreCoefficientMatrix = spssaux.getValuesFromXmlWorkspace(
        tag, 'Factor Score Coefficient Matrix', cellAttrib="number")
    ComponentScoreCoefficientMatrix = _spssOutputTableConversion(
        ComponentScoreCoefficientMatrix, CompNum, varNum)
    # Get component score
    dataCursor = spss.Cursor(ComponentScoreColumnIndex)
    ComponentScore = dataCursor.fetchall()
    dataCursor.close()
    return CorrelationMatrix, NonpositiveDefiniteCorM, KMO, Bartlett_sig, Communalities, VarExplainedInfo, RotatedVarExplainedInfo, ComponentMatrix, RotatedComponentMatrix, ComponentScoreCoefficientMatrix, ComponentScore
Ejemplo n.º 10
0
def dopropor(num=None,
             denom=None,
             id=None,
             dsname="*",
             alpha=.05,
             adjust='bonferroni'):

    if num is None or denom is None:
        raise ValueError("Error: NUM and DENOM keywords are required")
    if spss.PyInvokeSpss.IsUTF8mode():
        unistr = str
    else:
        unistr = str

    currentds = spss.ActiveDataset()
    if currentds == "*":
        currentds = "S" + str(random.uniform(0, 1))
        spss.Submit("DATASET NAME %s" % currentds)
        dsnamed = True
    else:
        dsnamed = False

    numvec, denomvec, idvec = getvalues(num, denom, id, dsname)
    # clean data, discard missing
    droplist = []
    for i in range(len(numvec)):
        droplist.append(numvec[i] is not None
                        and denomvec[i] is not None)  #missing data
        if (droplist[i] and (numvec[i] > denomvec[i] or denomvec[i] <= 0)):
            raise ValueError(
                "Error: NUM value greater than DENOM value or zero denominator: %s, %s"
                % (numvec[i], denomvec[i]))
    for lis in numvec, denomvec, idvec:
        lis = [x for f, x in zip(droplist, lis) if f]  #prune missing values
    if len(numvec) == 0:
        raise ValueError("Error: No valid proportions were found to analyze")

    alphalow = alpha / 2
    alphahigh = 1 - alphalow
    dotest = len(numvec) > 1
    try:
        spss.StartDataStep()  #TODO: pending transformations
    except:
        spss.Submit("EXECUTE")
        spss.StartDataStep()

    # calculate ci's via SPSS IDFs

    ds = spss.Dataset(name=None)
    spss.SetActive(ds)
    ds.varlist.append("p", 0)
    ds.varlist.append("num", 0)
    ds.varlist.append("denom", 0)

    p0 = numvec[0] / denomvec[0]
    sdvec = []
    for i in range(len(numvec)):
        p1 = numvec[i] / denomvec[i]
        sdvec.append(
            sqrt(p0 * (1 - p0) / denomvec[0] + p1 * (1 - p1) / denomvec[i]))
        #p = (numvec[i] + numvec[0]) / (denomvec[i] + denomvec[0])
        #z = (p1 - p0)/sqrt(p * (1 - p)*(1/denomvec[0] + 1/denomvec[i]))

        ds.cases.append([p1, numvec[i], denomvec[i]])
    spss.EndDataStep()

    cmd =r"""COMPUTE PLOWBI = IDF.BETA(%(alphalow)s, num + .5, denom-num + .5).
    COMPUTE PHIGHBI = IDF.BETA(%(alphahigh)s, num + .5,  denom - num + .5).
    DO IF num > 0.
    COMPUTE PLOWPOIS = (IDF.CHISQ(%(alphalow)s, 2*num)/2)/denom.
    ELSE.
    COMPUTE PLOWPOIS = 0.
    END IF.
    COMPUTE PHIGHPOIS = (IDF.CHISQ(%(alphahigh)s, 2*(num+1))/2) / denom.
    COMPUTE ZTAIL = IDF.NORMAL(%(alphahigh)s, 0,1).
    EXECUTE."""\
    % {"alphalow": alphalow, "alphahigh": alphahigh}

    spss.Submit(cmd)
    plowbi = []
    phighbi = []
    plowpois = []
    phighpois = []
    spss.StartDataStep()
    ds = spss.Dataset(name="*")
    for case in ds.cases:
        i = 3
        for v in plowbi, phighbi, plowpois, phighpois:
            v.append(case[i])
            i += 1
    zalpha2 = case[-1]
    try:
        closeafter = False
        spss.SetActive(spss.Dataset(name=currentds))
    except:
        closeafter = True
    ds.close()
    spss.EndDataStep()

    from spss import CellText
    spss.StartProcedure("Proportions")
    table = spss.BasePivotTable("Proportion Confidence Intervals",
                                "Proportions")
    titlefootnote = "Alpha = %.3f" % alpha
    if 0. in numvec:
        titlefootnote += " (One-sided %.3f when p = 0)" % (alpha / 2.)
    table.TitleFootnotes(titlefootnote)
    rowdim = table.Append(spss.Dimension.Place.row, "Proportions")
    coldim = table.Append(spss.Dimension.Place.column, "Statistics")
    cols = [
        "p", "Binomial\nLower CI", "Binomial\nUpper CI", "Poisson\nLower CI",
        "Poisson\nUpper CI", "Difference\nfrom p0",
        "Difference from p0\nLower CI", "Difference from p0\nUpper CI"
    ]
    table.SetCategories(coldim, [CellText.String(v) for v in cols])
    idvec = [
        not v is None and unistr(v) or unistr(i + 1)
        for i, v in enumerate(idvec)
    ]
    table.SetCategories(rowdim, [CellText.String(v) for v in idvec])
    for i in range(len(numvec)):
        p1 = numvec[i] / denomvec[i]
        if i > 0:
            zdifflow = p1 - p0 - sdvec[i] * zalpha2
            zdiffhigh = p1 - p0 + sdvec[i] * zalpha2
        else:
            zdifflow = zdiffhigh = 0.
        table.SetCellsByRow(CellText.String(idvec[i]), [
            CellText.Number(v)
            for v in (numvec[i] / denomvec[i], plowbi[i], phighbi[i],
                      plowpois[i], phighpois[i], p1 - p0, zdifflow, zdiffhigh)
        ])
        if i == 0:
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-3]))] = CellText.String("-")
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-2]))] = CellText.String("-")
            table[(CellText.String(idvec[0]),
                   CellText.String(cols[-1]))] = CellText.String("-")
    spss.EndProcedure()
    if closeafter:
        spss.Submit(r"""NEW FILE.
        DATASET NAME %s.""" % "S" + str(random.uniform(0, 1)))
Ejemplo n.º 11
0
import spss, spssaux
print(spss.__version__)
spss.Submit(
    "get file='C:\\Users\\sam\\Desktop\\Data202201119\\20201119_1047.sav'.")
spss.StartDataStep()
myDataset = spss.Dataset()
myVarlist = myDataset.varlist
print(len(myVarlist))
print(myVarlist)
for i in range(len(myVarlist)):
    print(str(i) + "--->" + spssaux.GetVariableNamesList()[i])
Ejemplo n.º 12
0
def gather(files, filetypes=["spss"], filenamepattern=None, dsname=None,attrlist=[], attrlength=256):
    """Create SPSS dataset listing variable names, variable labels, and source files for selected files.  Return the name of the new dataset.
    
    files is a list of files and/or directories.  If an item is a file, it is processed; if it is a directory, the files and subdirectories
    it contains are processed.
    filetypes is a list of filetypes to process.  It defaults to ["spss"] which covers sav and por.  It can also include
    "sas" for sas7bdat, sd7, sd2, ssd01, and xpt, and "stata" for dta
    filenamepattern is an optional parameter that can contain a regular expression to be applied to the filenames to filter the
    datasets that are processed.  It is applied to the filename itself, omitting any directory path and file extension.  The expression
    is anchored to the start of the name and ignores case.
    dsname is an optional name to be assigned to the new dataset.  If not specified, a name will be automatically generated.
    If dsname is specified, it will become the active dataset; otherwise, it need not be the active dataset.
    attrlist is an optional list of custom attributes to be included in the output. For array attributes, only the first item is
    recorded.  The value is blank if the attribute is not present for the variable.  Attribute variables are
    strings of size attrlength bytes, truncated appropriately.
    
    The output is just a dataset.  It must be saved, if desired, after this function has completed.
    Its name is the return value of this function.
    Exception is raised if any files not found.
    
    Examples:
    gathermetadata.gather(["c:/temp/firstlevel", "c:/spss16/samples/voter.sav"], ["spss", "sas"])
    searches spss and sas files in or under the temp/firstlevel directory plus the voter file.
    
    gathermetadata.gather(["c:/temp/firstlevel"], filenamepattern="car")
    searches the firstlevel directory for spss files whose names start with "car".
    """
    
    encoding = locale.getlocale()[1]
    filetypes = [f.lower() for f in filetypes]
    for ft in filetypes:
        if not ft in ["spss", "sas", "stata"]:
            raise ValueError(_("Filetypes must be one or more of spss, sas, and stata."))
    dsvars = {"source":"source", "variablename":"VariableName", "variablelabel":"variableLabel"}
    
    with DataStep():
        ds = spss.Dataset(name=None)
        dsn = ds.name
        varlist = ds.varlist
        varlist.append("source",200)
        varlist["source"].label=_("File containing the variable")
        varlist.append("variableName", 64)
        varlist["variableName"].label = _("Variable Name")
        varlist.append("variableLabel", 256)
        varlist["variableLabel"].label  = _("Variable Label")

        attrindexes = {}
        for i, aname in enumerate(attrlist):
            anamemod = addunique(dsvars, aname)
            varlist.append(dsvars[anamemod], attrlength)
            attrindexes[aname.lower()] = i
            
        
    addvarinfo = makeaddinfo(dsn, filetypes, filenamepattern, dsvars, attrindexes, attrlength)   #factory function
    
    files = [fixescapes(f) for f in files]  #UP is converting escape characters :-)
    # walk the list of files and directories and open
    
    try:   # will fail if spssaux is prior to version 2.3
        fh = spssaux.FileHandles()
    except:
        pass
    
    notfound = []
    for item in files:
        try:
            item = fh.resolve(item)
        except:
            pass
        if os.path.isfile(item):
            addvarinfo(item)
        elif os.path.isdir(item): 
            for dirpath, dirnames, fnames in os.walk(item):
                for f in fnames:
                    try:
                        addvarinfo(os.path.join(dirpath, f))
                    except EnvironmentError as e:
                        notfound.append(e.args[0])
        else:
            if not isinstance(item, str):
                item = str(item, encoding)
            notfound.append(_("Not found: %s") % item)

    spss.Submit("DATASET ACTIVATE %s." % dsn)
    if not dsname is None:
        spss.Submit("DATASET NAME %s." % dsname)
        dsn = dsname
    if notfound:
        raise ValueError("\n".join(notfound))
    return dsn
def genVarsCategoryList(varnames, specialvalues, macroname, missing, order, 
        weightvar, specialsorder, valuelabelsdict, missingvaluesdict,
        customattr, attrname):
    """Generate sorted list(s) of values with possible insertion of extra values
    and return list of SPSS macros to be created.
    
    varnames is a sequence of variable names to process.
    specialvalues is a sequence of values that should be inserted before the first zero count or at the end if no zeros or None.
    If a special value already occurs in a varname, it will be moved.
    macroname is a list of macronames of the same length as varnames to generate or None.
    missing is 'include' or 'exclude' to determine whether user missing values are included or excluded.
    order is 'a' or 'd' to specify the sort direction.
    weightvar can be specified as a variable name to be used as a weight in determing the counts to sort by.
    It must not occur in varnames.
    specialsorder is 'before' or 'after' and indicates the location of the specials section
    If other, values that have value labels are appended to the list of values found
    in the data.
    customattr indicates whether a custom attribute with the order should be generated
    attrname is the name of the custom attribute
    

    This function is mainly useful as a helper function for Ctables in building CATEGORIES subcommands.
    It may be useful to combine it with OTHERNM and/or MISSING in the category list.
    """

    if weightvar:
        if weightvar in varnames:
            raise ValueError(_("""The weight variable cannot be included as a variable."""))
        varnamesAndWeight = varnames + [weightvar]
    else:
        varnamesAndWeight = varnames
    curs = spssdata.Spssdata(indexes=varnamesAndWeight, names=False, omitmissing=missing =='exclude')
    nvar = len(varnames)
    
    vvalues=[{} for i in range(nvar)]  # for accumulating counts for all variable values
    for cn, case in enumerate(curs):
        casecpy = copy.copy(case)
        if weightvar:
            w = casecpy[nvar]
            if w is None:
                w = 0.0
        else:
            w = 1.0
        for i in range(nvar):
            if not casecpy[i] is None:   # omit sysmis values and optionally user missing values
                curval = casecpy[i]
                vvalues[i][curval] = vvalues[i].get(curval,0.) + w   # count occurrences, possibly weighted
    curs.CClose()
    
    valuelist = []
    macrosgenerated = []
    customattrlist = []
    for i, vname in enumerate(varnames):
        # if labeled values were supplied but did not occur in the data,
        # add them with a count of zero
        if not valuelabelsdict is None:
            labeledbutnotfound = valuelabelsdict[vname] - set(vvalues[i].keys())
            for val in labeledbutnotfound:
                vvalues[i][val] = 0.
        if not specialvalues is None:  # remove special values from count list
            for v in specialvalues:
                if v in vvalues[i]:
                    del(vvalues[i][v])
        valuelist.append(sorted([(value, key) for (key, value) in vvalues[i].iteritems()], reverse = order == 'd'))
        if not specialvalues is None:
            if specialsorder == "after":
                valuelist[i].extend([(None, v) for v in specialvalues])
            else:
                valuelist[i] = [(None, v) for v in specialvalues] + valuelist[i]

        if isinstance(valuelist[i][0][1], basestring):
            qchar = '"'
        else:
            qchar = ''
        if macroname is not None:
            if not macroname[i].startswith("!"):
                macroname[i] = "!" + macroname[i]
            macrosgenerated.append([macroname[i],
                " ".join([qchar + strconv(k).rstrip() + qchar  for (value, k) in valuelist[i]])])
        if customattr:
            customattrlist.append([vname, " ".join([qchar + strconv(k).rstrip() + qchar  for (value, k) in valuelist[i]])])
    
    if customattr:
        try:   # cannot start datastep if there are pending transformations
            spss.StartDataStep()
        except:
            spss.Submit("EXECUTE.")
            spss.StartDataStep()
        ds = spss.Dataset()
        
        for spec in customattrlist:
            ds.varlist[spec[0]].attributes[attrname] = spec[1]
        spss.EndDataStep()
            
        
    return macrosgenerated, customattrlist