def main(): gb.to_log += """data in = {data_in} data out = {data_out}\n\n""".format(data_in=gb.org_data, data_out=gb.select_rev) with open(log, 'w') as out: out.write(gb.to_log) spssaux.OpenDataFile(gb.org_data) spss.SetOutput("off") db = pd.DataFrame.from_csv(gb.input_csv) db2 = db.loc[db[gb.version].notnull() | db[gb.single_items].notnull() | db['rename'].notnull() | db['recode'].notnull()] vars, to_log2 = mod_database(db2, gb.prefix, gb.start, gb.stop) cmd = prepare_data(vars) with open(syntax_save, 'w') as out: out.write(cmd) list_of_all_vars = vars['items'].values.tolist() vars_in_file = spssaux.VariableDict().Variables to_log1 = str(len(list_of_all_vars)) + ' variables extracted.\n' print(to_log1) with open(log, 'a') as out: out.write(to_log1) how_often(vars, vars_in_file, gb.prefix, gb.start, gb.stop) reverse_items.main(vars, log, syntax_save) cmd = extract_vars(list_of_all_vars, gb.urvalsinfo, gb.select_rev, gb.keep_vars) with open(syntax_save, 'a') as out: out.write(cmd + '\n') #spss.Submit(cmd) with open(log, 'a') as out: out.write(to_log2) print("--- %s seconds ---" % (time.time() - start_time))
def main(): spssaux.OpenDataFile(data) spss.SetOutput("off") spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') db_prefixed = mod_database(db_glob, prefix, start, stop) #print(recode_cut(db_prefixed)) spss.Submit(recode_cut(db_prefixed)) #print(make_long_exp_vars()) spss.Submit(make_long_exp_vars()) spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
def recode(): cmd = '' spssaux.OpenDataFile(imputations + '/COMPLETE.SAV') #cmd += ("""SORT CASES BY Imputation_. #SPLIT FILE LAYERED BY Imputation_.\n""") recode_db = db_glob.loc[db_glob['recode2'].notnull()] cmd += '\n'.join([ 'RECODE {prefix}{var} {recode}.'.format(var=pos[0], recode=pos[1], prefix=gb.prefix + str(i)) for pos in recode_db[['items', 'recode2']].values.tolist() for i in range(gb.start, gb.stop) ]) cmd += '\nEXECUTE.\n' print(cmd) return (cmd)
def main(): spssaux.OpenDataFile(data) spss.SetOutput("off") spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') spss.Submit('SORT CASES BY Imputation_.\nSPLIT FILE LAYERED BY Imputation_.') db_prefixed = mod_database(db_glob,prefix,start,stop) get_median_cut() input_indep = db_prefixed.query('dependent != 1') spss.Submit(dicotomize(input_indep)) #spss.SetOutput('on') spss.Submit(make_analysis_exposure(input_indep)) spss.Submit("SAVE OUTFILE='COMPLETE_RECODED_BINARY.sav'\n/COMPRESSED.") outdb,outdb_err = calculate_odds(db_prefixed,start,stop,prefix) outdb.to_csv('{version}_{cutmod}.csv'.format(version=version,cutmod=cut_mod)) outdb_err.to_csv('error_{version}_{cutmod}.csv'.format(version=version, cutmod=cut_mod)) #save_output(outdb) spss.Submit("SAVE OUTFILE='%s'\n/COMPRESSED." % save_data)
def main(): os.chdir(imputations) spssaux.OpenDataFile(gb.select_rev) cmd = 'DATASET NAME orginal.\n' cmd += 'cd "{cwd}".\n'.format(cwd=imputations) spss.SetOutput("off") cmd += fix_value_lables() with open(imputation_syntax, 'w') as out: out.write(cmd) spss.Submit(cmd) run_cmd = impute_item_for_item() with open(imputation_syntax, 'a') as out: out.write('\n' + run_cmd) spss.Submit(run_cmd) match_files() cmd = recode() with open(imputation_syntax, 'a') as out: out.write('\n' + cmd) spss.Submit("""SAVE OUTFILE = '{imputed_data}' /COMPRESSED.\n""".format(imputed_data=gb.imputed_data))
def add_suffix(folder): for item in ["/no_strings", "/strings"]: datafiles = get_filelist(folder + item, 'sav') for file in datafiles: print(file) exclude = ['kod_id'] #Ange namnet på id_variablen spssaux.OpenDataFile(file) basename = os.path.basename(file).strip('.sav') suffix = basename #önskat suffix print(basename) vars = spssaux.VariableDict().variables for i in exclude: if i in vars: vars.remove(i) oldnames = spssaux.VariableDict().expand(vars) newnames = [varnam + "_" + suffix for varnam in oldnames] spss.Submit('rename variables (%s=%s).' % ('\n'.join(oldnames), '\n'.join(newnames))) spss.Submit(""" SAVE OUTFILE = "%s%s". DATASET CLOSE ALL. NEW FILE. """ % (folder + item + '/suffix/', basename + '.sav'))
def fix_files(folder, datevars, id_variable, date): datafiles = get_filelist(folder, "sav") for fil in datafiles: exclude = [] spssaux.OpenDataFile(fil) vars_in_file = spssaux.VariableDict().variables rename_vars(vars_in_file) spss.Submit("ALTER TYPE %s (f8)." % id_variable) stringvars = get_stringvars() print(stringvars) vars_in_file = spssaux.VariableDict().variables base, ext = os.path.basename(fil).split('.') for var in stringvars: if var in vars_in_file: exclude.append(var) if "AnswerDate" in vars_in_file: spss.Submit(""" COMPUTE time_days = DATEDIFF(AnswerDate, DATE.MDY(%s), "day"). EXECUTE. """ % date) for var in vars_in_file: if 'mean' not in var and var not in exclude + datevars: spss.Submit("ALTER TYPE %s (f8)." % var) spss.Submit("VARIABLE ALIGNMENT %s(right)." % var) spss.Submit("VARIABLE LEVEL %s(scale)." % var) if var != id_variable: spss.Submit("RECODE %s (SYSMIS=999)." % var) spss.Submit("MISSING VALUES %s (999)." % var) spss.Submit("SORT CASES BY %s (A)." % id_variable) spss.Submit("""COMPUTE data_from_week=%s. EXECUTE. ALTER TYPE data_from_week (f8). ALTER TYPE time_days (f8). """ % base) spss.Submit(save_and_close(base, exclude, folder)) spss.Submit(("NEW FILE."))
def main(): spssaux.OpenDataFile(data) #spss.SetOutput("off") spss.Submit('SET TNumbers=Values ONumbers=Labels OVars=Labels.') outdb = calculate_odds(start, stop, prefix) outdb.to_csv('long_vars_ut.csv')
import time start_time = time.time() """ Checks availability of variables in dataset and makes new dataset from selection variable and list of variables. """ from vars_and_functions import global_vars as gb from vars_and_functions import reverse_items import spss import spssaux import pandas as pd import time log = gb.folder + 'out/check_variables_log.txt' syntax_save = gb.folder + 'out/check_variables.sps' spssaux.OpenDataFile(gb.org_data) test = spssaux.VariableDict() print(test) vars = ['x4_6aevyt01', 'x4_6aevyt02'] for var in reversed(vars): i = test[var].index dataCursor = spss.Cursor([i]) oneVar = dataCursor.fetchall() #extending the example to get the actual list of values. uniqueList = list((set(oneVar))) uniq_vals = [ int(x[0]) for x in uniqueList if x[0] != None and x[0] not in gb.missing_int ] uniq_vals_rev = reversed(uniq_vals)
import SpssClient import spssaux import spss import csv import re SpssClient.StartClient() # User defined variables input_file = r'user_input_1' output_file = r'user_input_2' output_var_vals_file = r'user_input_3' output_var_info_file = r'user_input_4' # Load sav-file. spssaux.OpenDataFile(input_file) # Count number of columns varCount = spss.GetVariableCount() caseCount = spss.GetCaseCount() print('There are %d variables in this file' % varCount) print('There are %d cases. Please check this matches number of cases in the output file' % spss.GetCaseCount()) # Clean file: only string columns for ind in range(varCount): varName = spss.GetVariableName(ind) if spss.GetVariableType(ind) > 0: print("String Variable: %s" % varName) for ascii_ind in range(33): spss.Submit('compute %s = REPLACE(%s, STRING(%d,PIB), " ")' % (varName, varName, ascii_ind)) spss.Submit('EXECUTE.')