Ejemplo n.º 1
0
def mod_database(input_scale, prefix, start, stop):
    vars_in_file = spssaux.VariableDict().Variables
    df_dict = {}
    for i in range(start, stop):
        df = input_scale.reset_index()
        df['time'] = prefix + str(i)
        for col in gb.columns_to_use:
            df[col] = prefix + str(i) + df[col]
        df_dict[i] = df
    db = pd.concat(df_dict, ignore_index=True)
    diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist()
    db2 = db[db['items'].isin((vars_in_file))]
    spss.Submit(recode(db2))
    rename_db = db2.loc[db['rename'].notnull(), ['items', 'rename']]
    for i, row in rename_db.iterrows():
        orgname = row['items']
        new_name = row['rename']
        spss.Submit('RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format(
            orgname=orgname, new_name=new_name))
    vars_in_file = spssaux.VariableDict().Variables
    diff_vars = db['items'][~db['items'].isin(vars_in_file)].values.tolist()
    vars_not_in_file = 'Variables not in file\n' + '\n'.join(diff_vars) + '\n'
    print(
        str(len(diff_vars)) +
        ' variables not in file. Check log.txt for specification\n')
    db = db[db['items'].isin((vars_in_file))]
    return db, vars_not_in_file
Ejemplo n.º 2
0
def get_median_cut():
    cmd = ''
    cmd2 = ''

    input_dep = db_glob[db_glob['dependent'] == 1 & db_glob[version].notnull()]
    for dep in input_dep[version].unique():
        db = input_dep['items'][input_dep[version] == dep]
        for time in range(start,stop):
            list_of_dep_items = [prefix + str(time) + var for var in db]
            cmd += 'COMPUTE {var}=mean.1({var_list}).\n'.format(var=prefix+str(time)+dep,var_list=','.join(list_of_dep_items))
    cmd += 'EXECUTE.\n'
    #print(cmd)
    spss.Submit(cmd)
    list_of_dep = [prefix + str(start) + dep for dep in input_dep[version].unique()]
    tag, err = spssaux.CreateXMLOutput("""FREQUENCIES VARIABLES={vars}
        /STATISTICS=MEDIAN.\n""".format(vars=' '.join(list_of_dep)))
    spss.GetXmlUtf16(tag,folder+'/fest.xml')
    context = "/outputTree"
    xpath = "//category[@text='Median']/dimension[@text='Variables']/category[@variable='true']//@*[name()='number' or name()='varName']"
    median_list = spss.EvaluateXPath(tag, context, xpath)
    median_list = [[prefix + str(i) + median_list[n][4:],median_list[n+1]] for i in range(start,stop) for n in range(0,len(median_list),2)]
    for sublist in median_list:
        cmd2 += 'RECODE {var} (Lowest thru {mean}=0) (sysmis,77,88,99,0=sysmis) (else=1) into {var}_cut.\n'.format(var=sublist[0],mean=sublist[1])
    cmd2 += 'EXECUTE.\n'
    #print(cmd2)
    spss.Submit(cmd2)
    return list_of_dep
Ejemplo n.º 3
0
def synthesize(name=None, dir=None, mode=None):
  proc = "synthesize"
  args = srclib.arguments(name, dir, mode, proc)  # get the arguments
  if not args:
    return 1
  path = args[0]
  datain = args[1]
  dataout = args[2]
  rc = srclib.metadata(datain, path)  # get the metadata
  if rc != 0:
    return rc
  msg = None
  rc = srclib.execute("iveset", path, mode, None)  # execute iveset
  if rc != 0:
    msg = "Abnormal termination of iveset"
  else:
    if not os.path.exists(path + ".inp"):  # get the data
      msg = "Missing " + path + ".inp file"
    else:
      f = open(path + ".inp", "r")
      cmd = f.read()
      f.close()
      spss.Submit(cmd)
      if not mode:
        os.remove(path + ".inp")
      rc = srclib.execute("impute", path, mode, None)  # execute impute
      if rc != 0:
        msg = "Abnormal termination of synthesize"
      else:
        if dataout:  # output the synthesized data
          rc = srclib.execute("putdata", path, mode, None)  # execute putdata
          if rc != 0:
            msg = "Abnormal termination of putdata"
          else:
            if not os.path.exists(dataout + ".out"):  # write the synthesized data
              msg = "Missing " + dataout + ".out file"
            else:
              f = open(dataout + ".out", "r")
              cmd = f.read()
              f.close()
              spss.Submit(cmd)  # execute the command
              if not mode:
                os.remove(dataout + ".imp")
                os.remove(dataout + ".out")
  if msg:
    if os.path.exists(path + ".log"):  # copy the log
      print
      f = open(path + ".log", "r")
      print f.read()
      f.close()
    print msg  # print the error message
  else:
    if not os.path.exists(path + ".lst"):  # copy the listing
      msg = "Missing " + path + ".lst file"
    else:
      print
      f = open(path + ".lst", "r")
      print f.read()
      f.close()
Ejemplo n.º 4
0
 def OpenExcelFile(self, existingfile, ext, sheet, readnames,
                   assumedstrwidth, datasetname):
     spss.Submit(
         r"""get data /type=%s /file="%s" /sheet=name "%s" /readnames=%s
     /assumedstrwidth=%s.""" %
         (ext[1:], existingfile, sheet, readnames, assumedstrwidth))
     if datasetname:
         spss.Submit("dataset name %s." % datasetname)
Ejemplo n.º 5
0
def dolabels(variables=None, varpattern=None,
    lblvars=None, lblpattern=None, execute=True,
    varsperpass=20, syntax=None):
    """Execute STATS VALLBLS FROMDATA"""
    
# debugging
    # makes debug apply only to the current thread
    #try:
        #import wingdbstub
        #if wingdbstub.debugger != None:
            #import time
            #wingdbstub.debugger.StopDebug()
            #time.sleep(1)
            #wingdbstub.debugger.StartDebug()
        #import thread
        #wingdbstub.debugger.SetDebugThreads({thread.get_ident(): 1}, default_policy=0)
        ## for V19 use
        ###    ###SpssClient._heartBeat(False)
    #except:
        #pass
    try:
        vardict = spssaux.VariableDict(caseless=True)
    except:
        raise ValueError(_("""This command requires a  newer version the spssaux module.  \n
It can be obtained from the SPSS Community website (www.ibm.com/developerworks/spssdevcentral)"""))
    
    varstolabel = resolve(vardict, _("variables to label"), variables, varpattern, stringonly=False)
    labelvars = resolve(vardict, _("label variables"), lblvars, lblpattern, stringonly=True)
    if len(varstolabel) == 0 or len(labelvars) == 0:
        raise ValueError(_("""No variables to label or no labelling variables were specified.
If a pattern was used, it may not have matched any variables."""))
    if len(labelvars) > 1 and len(labelvars) != len(varstolabel):
        raise ValueError(_("The number of label variables is different from the number of variables to label"))
    if min([vardict[item].VariableType for item in labelvars]) == 0:
        raise ValueError(_("""The label variables must all have type string"""))
    dsname = spss.ActiveDataset()
    if dsname == "*":
        raise ValueError(_("""The active dataset must have a dataset name in order to use this procedure"""))
    if syntax:
        syntax = syntax.replace("\\", "/")
        syntax = FileHandles().resolve(syntax)
        
    mkvl = Mkvls(varstolabel, labelvars, varsperpass, execute, syntax, vardict)
    
    for i in range(0, len(varstolabel), varsperpass):
        spss.Submit("""DATASET ACTIVATE %s""" % dsname)
        mkvl.doaggr(i)
    spss.Submit("""DATASET ACTIVATE %s""" % dsname)    
    labelsyntax = mkvl.dolabels()
    if labelsyntax and execute:
        spss.Submit(labelsyntax)
    mkvl.report(labelsyntax)
    if labelsyntax and syntax:
        writesyntax(labelsyntax, syntax, mkvl)
Ejemplo n.º 6
0
def main():
    spssaux.OpenDataFile(data)
    spss.SetOutput("off")
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    db_prefixed = mod_database(db_glob, prefix, start, stop)
    #print(recode_cut(db_prefixed))
    spss.Submit(recode_cut(db_prefixed))
    #print(make_long_exp_vars())
    spss.Submit(make_long_exp_vars())

    spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
Ejemplo n.º 7
0
def bbdesign(name=None, dir=None, mode=None):
  proc = "bbdesign"
  args = srclib.arguments(name, dir, mode, proc)  # get the arguments
  if not args:
    return 1
  path = args[0]
  datain = args[1]
  dataout = args[2]
  rc = srclib.metadata(datain, path)  # get the metadata
  if rc != 0:
    return rc
  msg = None
  args = []  # execute putdata
  args.append("/setup")
  rc = srclib.execute("bbdesign", path, mode, args)  # execute bbdesign setup
  if rc != 0:
    msg = "Abnormal termination of bbdesign"
  else:
    if not os.path.exists(path + ".inp"):  # get the data
      msg = "Missing " + path + ".inp file"
    else:
      f = open(path + ".inp", "r")
      cmd = f.read()
      f.close()
      spss.Submit(cmd)
      if not mode:
        os.remove(path + ".inp")
      rc = srclib.execute("bbdesign", path, mode, None)  # execute bbdesign go
      if rc != 0:
        msg = "Abnormal termination of bbdesign"
      else:
        if os.path.exists(path + ".out"):  # write the samples
          f = open(path + ".out", "r")
          cmd = f.read()
          f.close()
          spss.Submit(cmd)  # execute the command
          if not mode:
            os.remove(path + ".out")
  if msg:
    if os.path.exists(path + ".log"):  # copy the log
      print
      f = open(path + ".log", "r")
      print f.read()
      f.close()
    print msg  # print the error message
  else:
    if not os.path.exists(path + ".lst"):  # copy the listing
      msg = "Missing " + path + ".lst file"
    else:
      print
      f = open(path + ".lst", "r")
      print f.read()
      f.close()
Ejemplo n.º 8
0
    def doaggr(self, doindex):
        """create an aggregate dataset and tally values
        
        doindex is the index into varstolabel at which to start"""
        
        vtl = self.varstolabel[doindex:doindex+self.varsperpass]
        vtllen = len(vtl)
        if len(self.labelvars) == 1:
            lbls = self.labelvars
            lastlbl = vtllen + 1
        else:
            lbls = self.labelvars[doindex:doindex+self.varsperpass]
            lastlbl = 3 * vtllen - 1
        brkvarlist = "\n".join(textwrap.wrap(" ".join(vtl), width=100))
        outvars = ["/min_%s=MIN(%s)/max_%s=MAX(%s)" % (mkrandomname(), v, mkrandomname(), v) for v in lbls]
        aggrcmd = Mkvls.aggrtemplate % (self.aggrdsname, self.aggrdsname, brkvarlist) + "\n".join(outvars)
        spss.Submit(aggrcmd)
        spss.Submit("DATASET ACTIVATE %s" % self.aggrdsname)
        
        # for each variable, build label information based on data
        # AGGREGATE dataset structure:
        # var1value, var2value,..., min(text lbl1), max(text lbl1), min(text lbl2), max(text lbl2)...
        # but if only one label set, only one pair of label aggregates is produced
        # user missing values are exposed and subject to labelling
        
        curs = spssdata.Spssdata(names=False, convertUserMissing=False)
        for case in curs:
            for v, vname in enumerate(vtl):
                value = case[v]
                minlbl = self.truncate(case[min(vtllen + v*2, lastlbl-1)], 120).rstrip()
                maxlbl = self.truncate(case[min(vtllen + v*2 + 1, lastlbl)], 120).rstrip()
                # more than one label for the same value?
                if minlbl != maxlbl and (minlbl != "" and minlbl is not None):
                    self.conflicts[vname].add(value)
                # ignore empty or missing labels
                if maxlbl != "" and maxlbl is not None:
                    # if the value has already been seen but with a different label, it's a conflict
                    if value in self.values[vname] and not (value, maxlbl) in self.vlabels[vname]:
                        self.conflicts[vname].add(value)
                    else:
                        self.vlabels[vname].add((value, maxlbl))  # first one wins
                        self.values[vname].add(value)
                        # tally instances where the same label used for different value
                        # need to see whether labels has been assigned to a different value
                        previousvalue =  self.labelusage[vname].get(maxlbl, None)
                        if previousvalue is not None and value != previousvalue:
                            ###self.duplabels[vname] = self.duplabels[vname] + 1
                            self.duplabels[vname].add(maxlbl)
                        self.labelusage[vname][maxlbl] = value

        curs.CClose()
        spss.Submit("DATASET CLOSE %s" % self.aggrdsname)
Ejemplo n.º 9
0
def runscript(scriptname, params={}):
    """Construct a parameter dictionary and run a Python script.
    
    scriptname is the path to run.
    params is a Python dictionary of parameter names and values.
    
    The total size of the parameter dictionary is limited to 4K (after pickling).
    
    This function returns a dictionary of values set by the script via setreturnvalue.
    If the script sets no return value, the result is an empty dictionary."""

    fnparams = tempfile.gettempdir() + os.sep + "__SCRIPT__"
    fnreturn = tempfile.gettempdir() + os.sep + "__SCRIPTRETURN__"
    f = open(fnparams, "w+")
    # ensure file size is 4096 for *nix os's.
    f.write(1024 * "0000")
    f.flush()

    shmem = mmap.mmap(f.fileno(), 4096, access=mmap.ACCESS_WRITE)
    shmem.write(pickle.dumps(params))
    f.close()
    try:
        os.remove(fnreturn)  # ensure that no stale returns file exists
    except:
        pass
    ###import wingdbstub
    spss.Submit("SCRIPT " + spssaux._smartquote(scriptname))
    shmem.close()

    # The _SYNC command is required in order to ensure that the script has completed
    spss.Submit("_SYNC")

    # The parameter file will be removed by the script if it calls getscriptparam, but
    # the following code will clean up in case the script doesn't make that call.
    try:
        os.remove(fnparams)
    except:
        pass

    # get the return value, if any
    ###import wingdbstub
    try:
        f = open(fnreturn, "r")
        shmem = mmap.mmap(f.fileno(), 4096, access=mmap.ACCESS_READ)
        ret = pickle.loads(shmem.read(4096))
        shmem.close()
        f.close()
        os.remove(fnreturn)
    except:
        ret = {}
    return ret
Ejemplo n.º 10
0
    def getsav(self, filespec, delete=True):
        """Open sav file and return all contents
        
        filespec is the file path
        filespec is deleted after the contents are read unless delete==False"""
     
        item = self.wdsname
        spss.Submit(r"""get file="%(filespec)s".
DATASET NAME %(item)s.
DATASET ACTIVATE %(item)s.""" % locals())
        contents = spssdata.Spssdata(names=False).fetchall()
        spss.Submit("""DATASET CLOSE %(item)s.
        NEW FILE.""" % locals())
        if delete:
            os.remove(filespec)
        return contents
Ejemplo n.º 11
0
    def genData(self):
        """Generate variables holding all the dichotomies for values"""

        valcount = len(self.vvalues)
        computes = []
        # The VALUE function only works for numeric variables :-(
        # In ANY, all string values are considered valid.
        if self.string:
            setvars = ",".join(self.setvars)
        else:
            setvars = ",".join(["VALUE(%s)" % v for v in self.setvars])
        values = sorted(self.vvalues)
        varprefix = self.varprefix

        # if any generated variables already exist, they will be overwritten.
        # if they exist and are strings, the procedure will fail.
        for v in range(valcount):
            v1 = v + 1
            vname = "%(varprefix)s_%(v1)02d" % locals()
            self.generatednames.append(vname)
            val = values[v]
            vallabel = self.valuelabels.get(
                val, val)  # try to pick up a value label
            self.generatedvalues.append(val)
            self.generatedlabels.append(vallabel)
            if self.string:
                val = spssaux._smartquote("%s" % val)
            cmd = """COMPUTE %(vname)s = any(%(val)s, %(setvars)s).
VARIABLE LABEL %(vname)s %(vallabel)s.
VARIABLE LEVEL %(vname)s (NOMINAL).""" % locals()
            computes.append(cmd)
        spss.Submit(computes)
Ejemplo n.º 12
0
def list_of_cut_old(indep):
    temp = sys.stdout
    sys.stdout = open('descript.txt', 'w')
    spss.Submit("""FREQUENCIES VARIABLES=%s
    /STATISTICS=MINIMUM MAXIMUM""" % indep)
    sys.stdout.close()
    sys.stdout = temp
    with open('descript.txt','r') as indata:
        valid_flag = 0
        listaallarader = []
        cut_vals = []
        for line in indata:
            listaallarader.append(line)
        for row in listaallarader:
            if 'Valid' in row:
                valid_flag = valid_flag +1
            if '__' not in row and valid_flag == 3:
                if 'Total' in row:
                    valid_flag = valid_flag+1
                    break
                e,e,value,e2,e,e,e,e = row.split('|')
                value = value.replace(',','.')
                value = value.replace(' ','')
                cut_vals.append(value)
        return cut_vals
Ejemplo n.º 13
0
def rename_vars(vars_in_file):
    for i, row in rename.iterrows():
        orgname = row['old']
        new_name = row['new']
        if orgname in vars_in_file:
            spss.Submit(
                'RENAME VARIABLES {orgname}={new_name}.\nEXECUTE.'.format(
                    orgname=orgname, new_name=new_name))
Ejemplo n.º 14
0
def dohistogram(finalweightvar):
    """Display unweighted histogram of weights
    
    finalweightvar is the generated weight variable"""
    
    title = _("Raked Weights Histogram before Applying New Weights")
    cmd = histtem % locals()
    spss.Submit(cmd)
def doactions(filespec=None, conflict="noname", currentactivedsn=None):
    """Execute command"""
    
    # debugging
    # makes debug apply only to the current thread
    try:
        import wingdbstub
        if wingdbstub.debugger != None:
            import time
            wingdbstub.debugger.StopDebug()
            time.sleep(1)
            wingdbstub.debugger.StartDebug()
        import _thread
        wingdbstub.debugger.SetDebugThreads({_thread.get_ident(): 1}, default_policy=0)
        # for V19 use
        ##    ###SpssClient._heartBeat(False)
    except:
        pass    
    if filespec is None and currentactivedsn is None:
        raise ValueError(_("No actions were specified for this command"))
    activeds = spss.ActiveDataset().lower()
    alldatasets = getAllDatasetNames()
    
    if currentactivedsn is not None:
        if currentactivedsn.lower() != activeds and \
           currentactivedsn.lower() in alldatasets:
            raise ValueError(_("""The dataset name to be assigned is already in use for another dataset: %s""")\
                % currentactivedsn)
        spss.Submit("""DATASET NAME %(currentactivedsn)s.
            DATAFILE ATTRIBUTE ATTRIBUTE=%(customdsattr)s(%(currentactivedsn)s).""")
        
    if filespec is not None:
        # The unnamed active dataset might be empty, but we preserve it in case it isn't
        if activeds == "*":
            spss.Submit("""DATASET NAME %s.""" % ("D" + random.ranunif(.1, 1.)))        
        spss.Submit("""GET FILE="%s". """ % filespec)
        thedsn = spss.GetDataFileAttributes(customdsattr)
        if len(thedsn) == 0:
            print(_("The data file does not contain a permanent dataset name.  No session dataset name has been assigned."))
        else:
            if thedsn[0].lower() in alldatasets:
                if conflict != "override":
                    print(_("The permanent dataset name is already in use in this session. No session dataset name has been assigned."))
                else:
                    print(_("The dataset name has been removed from an already open dataset: %s") % thedsn[0])
                    spss.Submit("""DATASET NAME %s.""" % thedsn[0])
Ejemplo n.º 16
0
def list_of_cut(indep):
    context = '/outputTree'
    xpath = '//pivotTable[@subType="Frequencies"]//group[@text="Valid"]//category/@number'
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    tag, err = spssaux.CreateXMLOutput('FREQUENCIES VARIABLES={}.'.format(indep))
    cut_list = spss.EvaluateXPath(tag, context, xpath)
    spss.DeleteXPathHandle(tag)
    return sorted(list((set(cut_list))))
 def __enter__(self):
     """initialization for with statement"""
     try:
         spss.StartDataStep()
     except:
         spss.Submit("EXECUTE")
         spss.StartDataStep()
     return self
Ejemplo n.º 18
0
def doheatmap(variables, yvar, xvar, paneldownvar, panelacrossvar, finalweightvar, autoheatmap):
    """produce a heatmap of the weights with 2 to 4 variables if requested
    
    variables is the set of variables with control totals
    yvar and xvar are the main variables for the heatmap (as lists)
    paneldownvar and panelacrossvar are variables for paneling down and across
    finalweightvar is the output weight variable
    autoheatmap specifies that the first two to four control variables
    define the heatmap dimensions"""
    
    # autoheatmap is used to get around dialog box limitations and overrides other
    # related variable specifications without a warning
    
    if len(variables) == 1:
        return    # No heatmap available
    if autoheatmap:
        yvar = [variables[0]]
        xvar = [variables[1]]
        hmsize = min(autoheatmap, len(variables), 4)
        if hmsize >=3:
            paneldownvar = [variables[2]]
        else:
            paneldownvar = None
        if hmsize >=4:
            panelacrossvar = [variables[3]]
        else:
            panelacrossvar = None
    
    plotvars = (yvar, xvar, paneldownvar, panelacrossvar)
    
    if not any(plotvars):
        return    # no plot requested
    if not all([yvar, xvar]):
        print(_("No heatmap produced: both y and x variables must be specified"))
        return
    if set(v[0] for v in plotvars if v is not None) - set(variables):
        print(_("No heatmap produced: only raking variables can be specified"))
        return
    if not paneldownvar:
        panelingdownvars = ""
        panelingdowntem = ""
    else:
        panelingdownvar = paneldownvar[0]
        panelingdownvars = """%(panelingdownvar)s[LEVEL=nomimal]""" % locals()
        panelingdowntem = """ "Panel down"="%(panelingdownvar)s"[DATASET="graphdataset"]""" % locals()
    if not panelacrossvar:
        panelingacrossvars = ""
        panelingacrosstem = ""
    else:
        panelingacrossvar = panelacrossvar[0]
        panelingacrossvars = """%(panelingacrossvar)s[LEVEL=nomimal]""" % locals()
        panelingacrosstem = """ "Panel across"="%(panelingacrossvar)s"[DATASET="graphdataset"]""" % locals()
    title = _("Unweighted Heatmap of Weights by Raking Variables")
    label = _("Weight Heatmap")
    yvar = yvar[0]
    xvar = xvar[0]
    cmd = hmtemplate % locals()
    spss.Submit(cmd)
Ejemplo n.º 19
0
def doblock(blocknum, atstart, atend, errorprint, errorcont, errorcall,
            syntax):
    """Execute block of syntax
    
    blocknum is the condition number
    atstart and atend are text to be displayed before and after
    errorprint is what to display on a syntax error
    errrcont is whether to continue running lines or stop
    syntax is a list of syntax lines to execute."""

    if not atstart is None:
        print(atstart.replace(")BLOCK", str(blocknum + 1)))
    lastline = len(syntax) - 1
    if lastline < 0:
        raise ValueError(_("""A syntax command block contains no syntax"""))

    # Submit each command one by one and handle error conditions
    cmd = []
    inmatrix = False
    for linenum, line in enumerate(syntax):
        cmd.append(line)
        # block or pseudo-block commands have to be submitted in a single call
        testline = line.rstrip().lower()
        if testline in ["matrix", "matrix."]:
            inmatrix = True
        dosubmit = not inmatrix and (linenum == lastline or (testline.endswith(".")\
            and (syntax[linenum+1].lower().strip() not in ["begin gpl", "begin gpl."])))
        if testline == "end matrix.":
            inmatrix = False
            dosubmit = True
        if dosubmit:
            try:
                spss.Submit(cmd)
                cmd = []
            except:
                if not errorprint is None:
                    print(errorprint.replace(")BLOCK", str(blocknum + 1)))
                if not errorcall is None:
                    # an error function can take control on error.
                    # It can return "stop" or "continue" to override the action specified in STATS IF
                    action = errorcall(blocknum + 1, cmd,
                                       spss.GetLastErrorLevel(),
                                       spss.GetLastErrorMessage())
                    if action == "stop":
                        break
                    elif action == "continue":
                        cmd = []
                        continue
                if not errorcont:
                    break
                cmd = []

    if not atend is None:
        print(atend.replace(")BLOCK", str(blocknum + 1)))
Ejemplo n.º 20
0
def main():
    os.chdir(imputations)
    spssaux.OpenDataFile(gb.select_rev)
    cmd = 'DATASET NAME orginal.\n'
    cmd += 'cd "{cwd}".\n'.format(cwd=imputations)
    spss.SetOutput("off")
    cmd += fix_value_lables()
    with open(imputation_syntax, 'w') as out:
        out.write(cmd)
    spss.Submit(cmd)
    run_cmd = impute_item_for_item()
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + run_cmd)
    spss.Submit(run_cmd)
    match_files()
    cmd = recode()
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + cmd)
    spss.Submit("""SAVE OUTFILE = '{imputed_data}'
    /COMPRESSED.\n""".format(imputed_data=gb.imputed_data))
Ejemplo n.º 21
0
def get_hostname(lines):
    m = re.search('\[Contacting .* host "(.*)".*\]', "\n".join(lines))
    if m:
        hostname = m.group(1)
        if hostname.lower() == "no-net":
            msg = "This program is intended for concurrent licenses only"
            issue_warning(msg)
            spss.Submit("show license.")
        return hostname
    else:
        raise ValueError("Hostname not found")
Ejemplo n.º 22
0
    def setgen(self):
        """construct a new MR set of the appropriate type"""

        cmd = """MRSETS /MDGROUP NAME=%(outputname)s LABEL="%(label)s" 
VARIABLES = %(variables)s VALUE=1
/DISPLAY NAME=[%(outputname)s]"""

        outputname = self.setname
        label = self.mrsetinfo.getSetLabel(self.mcset)
        variables = " ".join(self.generatednames)
        spss.Submit(cmd % locals())
Ejemplo n.º 23
0
 def __init__(self):
     try:
         spss.StartDataStep()
     except:
         spss.Submit("EXECUTE.")
         spss.StartDataStep()
     self.ds = spss.Dataset()
     self.varlist = self.ds.varlist
     self.mrsets = {}
     # the api always returns the set name in upper case
     for name, theset in self.ds.multiResponseSet.data.items():
         self.mrsets[name.upper()] = theset
Ejemplo n.º 24
0
 def addinfo(filespec):
     """open the file if appropriate type, extract variable information, and add it to dataset dsname.
     
     filespec is the file to open
     dsname is the dataset name to append to
     filetypes is the list of file types to include."""
     
     fnsplit = os.path.split(filespec)[1]
     fn, ext = os.path.splitext(fnsplit)
     for ft in filetypes:
         if ext in ftdict[ft]:
             if pat is None or pat.match(fn):
                 try:
                     spss.Submit(spsscmd[ft] % filespec)
                     spss.Submit("DATASET NAME @__GATHERMD__.")
                 except:
                     if not isinstance(filespec, str):
                         filespec = str(filespec, encoding)
                     raise EnvironmentError(_("File could not be opened, skipping: %s") % filespec)
                 break
     else:
         return addinfo
     
     with DataStep():
         ds = spss.Dataset(name=dsname)  # not the active dataset
         dssource = spss.Dataset(name="*")  # The dataset to examine
         numvars = spss.GetVariableCount() # active dataset
         variables = dssource.varlist
         for v in range(numvars):
             lis = [filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v)]
             lis.extend(blanks)
             lis = [item+ 256*" " for item in lis]
             ds.cases.append(lis)
             #ds.cases.append([filespec.replace("\\","/"), spss.GetVariableName(v), spss.GetVariableLabel(v), *blanks])
             if includeAttrs:
                 attrs = variables[v].attributes.data
                 for a in attrs:
                     if a.lower() in attrindexes:
                         ds.cases[-1, attrindexes[a.lower()]+ 3] = attrs[a][0] +  attrlength * " "# allow for standard variables
     spss.Submit("DATASET CLOSE @__GATHERMD__.")
Ejemplo n.º 25
0
 def execute(self, commands):
     transformedCommands = ['* Encoding: UTF-8.']
     for command in commands:
         # command = command.replace("\n", " ");
         if (len(command) >= 1 and not (command[-1] == '.')):
             command += '.'
         # SPSS probably only understands ASCII
         transformedCommands.append(command)
         #try:
         """
         Execute all commands as batch; this allows to execute BEGIN DUMMY. [...] END DUMMY. as well
         i.e. BEGIN MATRIX. [...] END MATRIX. 
         """
     spss.Submit(transformedCommands)
Ejemplo n.º 26
0
def add_suffix(folder):
    for item in ["/no_strings", "/strings"]:
        datafiles = get_filelist(folder + item, 'sav')
        for file in datafiles:
            print(file)
            exclude = ['kod_id']  #Ange namnet på id_variablen
            spssaux.OpenDataFile(file)
            basename = os.path.basename(file).strip('.sav')
            suffix = basename  #önskat suffix
            print(basename)
            vars = spssaux.VariableDict().variables
            for i in exclude:
                if i in vars:
                    vars.remove(i)
            oldnames = spssaux.VariableDict().expand(vars)
            newnames = [varnam + "_" + suffix for varnam in oldnames]
            spss.Submit('rename variables (%s=%s).' %
                        ('\n'.join(oldnames), '\n'.join(newnames)))
            spss.Submit("""
            SAVE OUTFILE = "%s%s".
            DATASET CLOSE ALL.
            NEW FILE.
            """ % (folder + item + '/suffix/', basename + '.sav'))
Ejemplo n.º 27
0
def match_files():
    cmd1 = ''
    cmd2 = ''
    A = funcs.get_filelist(imputations, 'sav')
    f = lambda A, n=30: [A[i:i + n] for i in range(0, len(A), n)]
    cmd1 += '\n'.join([
        'MATCH FILES\n' + '\n'.join([' /FILE="%s"' % fil for fil in dl]) +
        '\n /BY Imputation_ %s.\n' % gb.id +
        'SAVE OUTFILE="part_%s.sav"\n/COMPRESSED.\n' % f(A).index(dl)
        for dl in f(A)
    ])
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + cmd1)
    spss.Submit(cmd1)
    B = [x for x in funcs.get_filelist(imputations, 'sav') if 'part_' in x]
    cmd2 += '\n'.join([
        'MATCH FILES\n' + '\n'.join([' /FILE="%s"' % fil for fil in B]) +
        '\n /BY Imputation_ %s.\n' % gb.id +
        'SAVE OUTFILE="COMPLETE.sav"\n/COMPRESSED.\n'
    ])
    cmd2 += 'DATASET CLOSE ALL.\n'
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + cmd2)
    spss.Submit(cmd2)
Ejemplo n.º 28
0
 def createHandle(self, handle, spec, encoding=None):
     """Create a file handle and update the handle list accordingly
     
     handle is the name of the handle
     spec is the location specification, i.e., the /NAME value
     encoding optionally specifies the encoding according to the valid values in the FILE HANDLE syntax."""
     
     spec = re.sub(r"[\\/]", re.escape(os.path.sep), spec)   # clean up path separator
     cmd = """FILE HANDLE %(handle)s /NAME="%(spec)s" """ % locals()
     # Note the use of double quotes around the encoding name as there are some encodings that
     # contain a single quote in the name
     if encoding:
         cmd += ' /ENCODING="' + encoding + '"'
     spss.Submit(cmd)
     self.fhdict[handle.lower()] = (spec, encoding)
def search(name=None, dir=None, mode=None):
    proc = "search"
    args = srclib.arguments(name, dir, mode, proc)  # get the arguments
    if not args:
        return 1
    path = args[0]
    datain = args[1]
    dataout = args[2]
    rc = srclib.metadata(datain, path)  # get the metadata
    if rc != 0:
        return rc
    msg = None
    rc = srclib.execute("srchset", path, mode, None)  # execute srchset
    if rc != 0:
        msg = "Abnormal termination of srchset"
    else:
        if not os.path.exists(path + ".inp"):  # get the data
            msg = "Missing " + path + ".inp file"
        else:
            f = open(path + ".inp", "r")
            cmd = f.read()
            f.close()
            spss.Submit(cmd)
            if not mode:
                os.remove(path + ".inp")
            rc = srclib.execute("search", path, mode, None)  # execute search
            if rc != 0:
                msg = "Abnormal termination of search"
            else:
                if dataout:
                    srclib.residuals(datain, dataout, path,
                                     mode)  # output the residuals
    if msg:
        if os.path.exists(path + ".log"):  # copy the log
            print
            f = open(path + ".log", "r")
            print f.read()
            f.close()
        print msg  # print the error message
    else:
        if not os.path.exists(path + ".lst"):  # copy the listing
            msg = "Missing " + path + ".lst file"
        else:
            print
            f = open(path + ".lst", "r")
            print f.read()
            f.close()
Ejemplo n.º 30
0
def calculate_odds(db_prefixed,start,stop,prefix):
    cmd_recode = ''
    cmd_log_reg = ''
    db_list = []
    db_err_list = []
    #input_indep = db_glob.query('dependent != 1')
    input_dep = db_glob.query('dependent == 1')
    dep_list = get_median_cut()
    n = 0
    for time in range(start,stop):
        dep_list_time = [prefix+str(time)+item[4:] for item in dep_list]
        myset = set(dep_list_time)
        dep_list_time = list(myset)
        input_indep = db_prefixed.query('dependent != 1 and time == "%s%s"' % (prefix,str(time)))
        for typ in columns_to_use:
            if typ == version:
                suffix = '_middle'
            if typ == single_items:
                suffix = '_single'
            for indep in input_indep[typ].unique():
                if str(indep) != 'nan':
                    indep = indep + suffix
                    cutoffs = list_of_cut(indep)
                    for dep in dep_list_time:
                        for x in range(len(cutoffs) - 1):
                            if n >= error_search:

                                #cmd_log_reg += recode_cut(indep,cutoffs,x)
                                #cmd_log_reg += log_reg_cmd(dep,indep)

                                spss.Submit(recode_cut(indep,cutoffs, x))
                                db, db_err = log_reg(dep,indep,cutoffs[x+1],n)
                                db_list.append(db)
                                db_err_list.append(db_err)

                                # spss.Submit(odds(dep, indep))
                                #db_list.append(log_reg(dep,indep,cutoffs[x+1],n))
                            n += 2
                        #db_list.append(find_value(indep,dep,prefix+str(time)))

    db = pd.concat(db_list)
    db_err = pd.concat(db_err_list)
    #print(cmd_log_reg)
    return db, db_err