def main():
    gb.to_log += """data in = {data_in}
data out = {data_out}\n\n""".format(data_in=gb.org_data,
                                    data_out=gb.select_rev)
    with open(log, 'w') as out:
        out.write(gb.to_log)
    spssaux.OpenDataFile(gb.org_data)
    spss.SetOutput("off")
    db = pd.DataFrame.from_csv(gb.input_csv)
    db2 = db.loc[db[gb.version].notnull() | db[gb.single_items].notnull()
                 | db['rename'].notnull() | db['recode'].notnull()]
    vars, to_log2 = mod_database(db2, gb.prefix, gb.start, gb.stop)
    cmd = prepare_data(vars)
    with open(syntax_save, 'w') as out:
        out.write(cmd)
    list_of_all_vars = vars['items'].values.tolist()
    vars_in_file = spssaux.VariableDict().Variables
    to_log1 = str(len(list_of_all_vars)) + ' variables extracted.\n'
    print(to_log1)
    with open(log, 'a') as out:
        out.write(to_log1)
    how_often(vars, vars_in_file, gb.prefix, gb.start, gb.stop)
    reverse_items.main(vars, log, syntax_save)
    cmd = extract_vars(list_of_all_vars, gb.urvalsinfo, gb.select_rev,
                       gb.keep_vars)
    with open(syntax_save, 'a') as out:
        out.write(cmd + '\n')
    #spss.Submit(cmd)
    with open(log, 'a') as out:
        out.write(to_log2)
    print("--- %s seconds ---" % (time.time() - start_time))
Exemple #2
0
def main():
    spssaux.OpenDataFile(data)
    spss.SetOutput("off")
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    db_prefixed = mod_database(db_glob, prefix, start, stop)
    #print(recode_cut(db_prefixed))
    spss.Submit(recode_cut(db_prefixed))
    #print(make_long_exp_vars())
    spss.Submit(make_long_exp_vars())

    spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
Exemple #3
0
def recode():
    cmd = ''
    spssaux.OpenDataFile(imputations + '/COMPLETE.SAV')
    #cmd += ("""SORT CASES  BY Imputation_.
    #SPLIT FILE LAYERED BY Imputation_.\n""")
    recode_db = db_glob.loc[db_glob['recode2'].notnull()]
    cmd += '\n'.join([
        'RECODE {prefix}{var} {recode}.'.format(var=pos[0],
                                                recode=pos[1],
                                                prefix=gb.prefix + str(i))
        for pos in recode_db[['items', 'recode2']].values.tolist()
        for i in range(gb.start, gb.stop)
    ])
    cmd += '\nEXECUTE.\n'
    print(cmd)
    return (cmd)
Exemple #4
0
def main():
    spssaux.OpenDataFile(data)
    spss.SetOutput("off")
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    spss.Submit('SORT CASES  BY Imputation_.\nSPLIT FILE LAYERED BY Imputation_.')
    db_prefixed = mod_database(db_glob,prefix,start,stop)
    get_median_cut()
    input_indep = db_prefixed.query('dependent != 1')
    spss.Submit(dicotomize(input_indep))
    #spss.SetOutput('on')
    spss.Submit(make_analysis_exposure(input_indep))
    spss.Submit("SAVE OUTFILE='COMPLETE_RECODED_BINARY.sav'\n/COMPRESSED.")
    outdb,outdb_err = calculate_odds(db_prefixed,start,stop,prefix)
    outdb.to_csv('{version}_{cutmod}.csv'.format(version=version,cutmod=cut_mod))
    outdb_err.to_csv('error_{version}_{cutmod}.csv'.format(version=version, cutmod=cut_mod))
    #save_output(outdb)
    spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
Exemple #5
0
def main():
    os.chdir(imputations)
    spssaux.OpenDataFile(gb.select_rev)
    cmd = 'DATASET NAME orginal.\n'
    cmd += 'cd "{cwd}".\n'.format(cwd=imputations)
    spss.SetOutput("off")
    cmd += fix_value_lables()
    with open(imputation_syntax, 'w') as out:
        out.write(cmd)
    spss.Submit(cmd)
    run_cmd = impute_item_for_item()
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + run_cmd)
    spss.Submit(run_cmd)
    match_files()
    cmd = recode()
    with open(imputation_syntax, 'a') as out:
        out.write('\n' + cmd)
    spss.Submit("""SAVE OUTFILE = '{imputed_data}'
    /COMPRESSED.\n""".format(imputed_data=gb.imputed_data))
Exemple #6
0
def add_suffix(folder):
    for item in ["/no_strings", "/strings"]:
        datafiles = get_filelist(folder + item, 'sav')
        for file in datafiles:
            print(file)
            exclude = ['kod_id']  #Ange namnet på id_variablen
            spssaux.OpenDataFile(file)
            basename = os.path.basename(file).strip('.sav')
            suffix = basename  #önskat suffix
            print(basename)
            vars = spssaux.VariableDict().variables
            for i in exclude:
                if i in vars:
                    vars.remove(i)
            oldnames = spssaux.VariableDict().expand(vars)
            newnames = [varnam + "_" + suffix for varnam in oldnames]
            spss.Submit('rename variables (%s=%s).' %
                        ('\n'.join(oldnames), '\n'.join(newnames)))
            spss.Submit("""
            SAVE OUTFILE = "%s%s".
            DATASET CLOSE ALL.
            NEW FILE.
            """ % (folder + item + '/suffix/', basename + '.sav'))
Exemple #7
0
def fix_files(folder, datevars, id_variable, date):
    datafiles = get_filelist(folder, "sav")
    for fil in datafiles:
        exclude = []
        spssaux.OpenDataFile(fil)
        vars_in_file = spssaux.VariableDict().variables
        rename_vars(vars_in_file)
        spss.Submit("ALTER TYPE %s (f8)." % id_variable)
        stringvars = get_stringvars()
        print(stringvars)
        vars_in_file = spssaux.VariableDict().variables
        base, ext = os.path.basename(fil).split('.')
        for var in stringvars:
            if var in vars_in_file:
                exclude.append(var)
        if "AnswerDate" in vars_in_file:
            spss.Submit("""
            COMPUTE time_days = DATEDIFF(AnswerDate, DATE.MDY(%s), "day").
            EXECUTE.
            """ % date)
        for var in vars_in_file:
            if 'mean' not in var and var not in exclude + datevars:
                spss.Submit("ALTER TYPE %s (f8)." % var)
                spss.Submit("VARIABLE ALIGNMENT %s(right)." % var)
                spss.Submit("VARIABLE LEVEL %s(scale)." % var)
                if var != id_variable:
                    spss.Submit("RECODE %s (SYSMIS=999)." % var)
                    spss.Submit("MISSING VALUES %s (999)." % var)
        spss.Submit("SORT CASES BY %s (A)." % id_variable)
        spss.Submit("""COMPUTE data_from_week=%s.
        EXECUTE.
        ALTER TYPE data_from_week (f8).
        ALTER TYPE time_days (f8).
        """ % base)
        spss.Submit(save_and_close(base, exclude, folder))
    spss.Submit(("NEW FILE."))
Exemple #8
0
def main():
    spssaux.OpenDataFile(data)
    #spss.SetOutput("off")
    spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.')
    outdb = calculate_odds(start, stop, prefix)
    outdb.to_csv('long_vars_ut.csv')
Exemple #9
0
import time
start_time = time.time()
"""
Checks availability of variables in dataset and makes new dataset from selection variable and list of variables.
"""
from vars_and_functions import global_vars as gb
from vars_and_functions import reverse_items
import spss
import spssaux
import pandas as pd
import time

log = gb.folder + 'out/check_variables_log.txt'
syntax_save = gb.folder + 'out/check_variables.sps'
spssaux.OpenDataFile(gb.org_data)
test = spssaux.VariableDict()
print(test)
vars = ['x4_6aevyt01', 'x4_6aevyt02']

for var in reversed(vars):
    i = test[var].index
    dataCursor = spss.Cursor([i])
    oneVar = dataCursor.fetchall()
    #extending the example to get the actual list of values.
    uniqueList = list((set(oneVar)))
    uniq_vals = [
        int(x[0]) for x in uniqueList
        if x[0] != None and x[0] not in gb.missing_int
    ]
    uniq_vals_rev = reversed(uniq_vals)
import SpssClient
import spssaux
import spss
import csv
import re

SpssClient.StartClient()

# User defined variables
input_file = r'user_input_1'
output_file = r'user_input_2'
output_var_vals_file = r'user_input_3'
output_var_info_file = r'user_input_4'

# Load sav-file.
spssaux.OpenDataFile(input_file)

# Count number of columns
varCount = spss.GetVariableCount()
caseCount = spss.GetCaseCount()
print('There are %d variables in this file' % varCount)
print('There are %d cases. Please check this matches number of cases in the output file' % spss.GetCaseCount())

# Clean file: only string columns
for ind in range(varCount):
    varName = spss.GetVariableName(ind)
    if spss.GetVariableType(ind) > 0:
        print("String Variable: %s" % varName)
        for ascii_ind in range(33):
            spss.Submit('compute %s = REPLACE(%s, STRING(%d,PIB), " ")' % (varName, varName, ascii_ind))
        spss.Submit('EXECUTE.')