def test_metadata_same(self): u_settings = dict(ioUtf8=True, ioLocale=ioLocale) with sav.SavHeaderReader(in_savFileName, **u_settings) as header: in_metadata = header.dataDictionary() in_encoding = header.fileEncoding self.assertEqual("utf_8", in_encoding) with sav.SavHeaderReader(out_savFileName, **u_settings) as header: out_metadata = header.dataDictionary() out_encoding = header.fileEncoding self.assertEqual("utf_8", out_encoding) self.assertEqual(in_metadata, out_metadata) # check if the locale is reset self.assertEqual(locale.setlocale(locale.LC_CTYPE), currLocale)
def main(): MAX_ROWS = 5000 MAX_DECIMALS = 6 if len(sys.argv) < 2 or len(sys.argv) > 3: print('Usage: ' + sys.argv[0] + ' inputfile [outputpath]') sys.exit(1) if len(sys.argv) == 2: sys.argv.append('') try: with savReaderWriter.SavHeaderReader(sys.argv[1], ioUtf8=False) as header: metadata = header.all() res = { 'alignments': metadata.alignments, 'columnWidths': metadata.columnWidths, 'measureLevels': metadata.measureLevels, 'varFormats': metadata.formats, 'varTypes': metadata.varTypes, 'varNames': metadata.varNames, 'valueLabels': metadata.valueLabels, 'varLabels': metadata.varLabels, # # Otros valores que vienen en el header: # 'caseWeightVar': metadata.caseWeightVar, # 'fileAttributes': metadata.fileAttributes, # 'fileLabel': metadata.fileLabel, # 'missingValues': metadata.missingValues, # 'multRespDefs': metadata.multRespDefs, # 'varAttributes': metadata.varAttributes, # 'varRoles': metadata.varRoles, # 'varSets': metadata.varSets, } with open(os.path.join(sys.argv[2], 'header.json'), 'w') as h: h.write(json.dumps(convert_recursive(res), indent=4)) with savReaderWriter.SavReader(sys.argv[1], ioUtf8=False) as reader: for i, lines in enumerate(chunks(reader, MAX_ROWS), 1): with open( os.path.join(sys.argv[2], 'data_' + str(i).zfill(5) + '.json'), 'w') as f: encoded = convert_recursive(lines) jsonText = json.dumps(encoded) # jsonText = truncate_decimals(jsonText, MAX_DECIMALS) f.write(jsonText) print('Files successfully created.') return except: print("Error: ", sys.exc_info()) traceback.print_exc() sys.exit(1)
def test_raises_SPSSIOWarning(self): module = rw if sys.version_info[0] > 2 else rw.error SPSSIOWarning = module.SPSSIOWarning #with self.assertRaises(SPSSIOWarning) as e: with warnings.catch_warnings(record=True) as w: with rw.SavHeaderReader(self.savFileName) as header: metadata = str(header) self.assertTrue(issubclass(w[-1].category, UserWarning)) self.assertTrue("SPSS_NO_CASEWGT" in str(w[-1].message))
def test_accented_varSet_codepage_mode(): with srw.SavWriter(**kwargs) as writer: for i in range(10): writer.writerow([1, 1]) with srw.SavHeaderReader(kwargs["savFileName"]) as header: actual = header.varSets desired = {b'\xc3\xbcberhaupt': [b'salbegin', b'salary']} remove(kwargs["savFileName"]) assert actual == desired, actual
def test_accented_varSet_unicode_mode(): kwargs["varSets"] = {u'\xfcberhaupt': varNames} kwargs["ioUtf8"] = True with srw.SavWriter(**kwargs) as writer: for i in range(10): writer.writerow([1, 1]) with srw.SavHeaderReader(kwargs["savFileName"], ioUtf8=True) as header: actual = header.varSets desired = {u'\xfcberhaupt': [u'salbegin', u'salary']} remove(kwargs["savFileName"]) assert actual == desired, actual
def spss_to_csv(input_filepath, output_filepath, upsample_size=100000, w_col='Weight'): """ Runs data processing scripts to turn raw data from (../raw) into cleaned data ready to be analyzed (saved in ../processed). """ with srw.SavHeaderReader(input_filepath, ioUtf8=True) as header: metadata = header.all() value_replacements = metadata.valueLabels nan_replacements = dict() for k, v in metadata.missingValues.items(): if v and 'values' in v: nan_values = dict() if isinstance(v['values'], list): for nan_val in v['values']: nan_values[nan_val] = np.nan else: nan_values[v['values']] = np.nan nan_replacements[k] = nan_values questions = metadata.varLabels with srw.SavReader(input_filepath, ioUtf8=True) as reader: header = reader.header records = reader.all() df = pd.DataFrame(records, columns=header) df.replace(value_replacements, inplace=True) df.replace(nan_replacements, inplace=True) df.to_csv(output_filepath) questions_file = Path(output_filepath).with_suffix('.json') with open(questions_file, 'w') as qf: json.dump(questions, qf) # upsample the survey for a "representative" sample for analysis # if specified if upsample_size and upsample_size > 0 and w_col in df.columns: rng = np.random.RandomState(12345) smpl = rng.choice(df.index, upsample_size, p=df[w_col] / df[w_col].sum()) df_resampled = df.loc[smpl, :] output_path = Path(output_filepath) df_resampled.to_csv( output_path.with_name(output_path.stem + '_upsampled.csv'))
currLocale = locale.setlocale(locale.LC_CTYPE) # Demonstrates use of ioUtf8=UNICODE_BMODE, or ioUtf8=2. # This is regular unicode mode (ioUtf8=UNICODE_BMODE, or ioUtf8=1, # or ioUtf8=True), but data will be returned as bytes. in_savFileName = "test_data/metadata_copy_test.sav" is_windows = sys.platform.startswith("win") ioLocale = "german" if is_windows else "de_DE.cp1252" b_settings = dict(ioUtf8=sav.UNICODE_BMODE, ioLocale=ioLocale) # read SPSS file data with sav.SavReader(in_savFileName, rawMode=True, **b_settings) as data: in_records = data.all(False) # read SPSS file metadata with sav.SavHeaderReader(in_savFileName, **b_settings) as header: metadata = header.dataDictionary() #pprint(metadata) # write (unmodified) data to SPSS file out_savFileName = os.path.join(tempfile.gettempdir(), 'out.sav') metadata.update(b_settings) with sav.SavWriter(out_savFileName, **metadata) as writer: writer.writerows(in_records) # Now test whether input and output are the same class Test_MetadataRoundTrip(unittest.TestCase): def setUp(self): self.maxDiff = None
def get_metadata(savFilename): """Gets variable names (list), variable types and formats (dict)""" with savReaderWriter.SavHeaderReader(savFilename) as header: varNames, varTypes = header.varNames, header.varTypes formats = header.formats return varNames, varTypes, formats
def leervaloresVariables(): h = savReaderWriter.SavHeaderReader(archivo, ioUtf8=True, ioLocale='Spanish_Spain.1252') return h.valueLabels
def leerrotulos(): with savReaderWriter.SavHeaderReader( archivo, ioLocale='Spanish_Spain.1252') as header: return header.varLabels
def leercabezotes(): with savReaderWriter.SavHeaderReader( archivo, ioLocale='Spanish_Spain.1252') as header: return header.varNames
def leervaloresVariables(): with savReaderWriter.SavHeaderReader( archivo, ioLocale='Spanish_Spain.1252') as header: return header.valueLabels.items()
formats = [ "S%d" % data.varTypes[v] if data.varTypes[v] else np.float64 for v in data.varNames ] dtype = np.dtype({'names': data.varNames, 'formats': formats}) structured_array = np.array([tuple(record) for record in records], dtype=dtype) allDataArray = np.array( records ) # in the most recent version one can directly read to numpy arrays print(records) # reading metadata from SPSS file with sav.SavHeaderReader(spss_file, ioUtf8=True, ioLocale=ioLocale) as header: metadata = header.dataDictionary( asNamedtuple=False) # Why does this take so long? pprint.pprint(metadata) # writing unmodified data with sav.SavWriter(spss_file_out, overwrite=True, ioUtf8=True, ioLocale=ioLocale, mode=b'wb', refSavFileName=None, **metadata) as writer: for i, record in enumerate(structured_array): writer.writerow(record)
def extract_sav_meta(sav_file, name="", data=None, ioLocale='en_US.UTF-8', ioUtf8=True): """ see parse_sav_file doc """ with sr.SavHeaderReader(sav_file, ioLocale=ioLocale, ioUtf8=ioUtf8) as header: # Metadata Attributes # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles', # 'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats', # 'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments', # 'fileLabel', 'missingValues'] metadata = header.dataDictionary(True) meta = start_meta(name=name) meta['info']['text'] = 'Converted from SAV file %s.' % (name) meta['info']['from_source'] = {'pandas_reader':'sav'} meta['sets']['data file']['items'] = [ 'columns@%s' % (varName) for varName in metadata.varNames ] # This should probably be somewhere in the metadata # weight_variable_name = metadata.caseWeightVar # Descriptions of attributes in metadata are are located here : # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files for column in metadata.varNames: meta['columns'][column] = {} if column in metadata.valueLabels: # ValueLabels is type = 'single' (possibry 1-1 map) meta['columns'][column]['values'] = [] meta['columns'][column]['type'] = "single" for value, text in metadata.valueLabels[column].iteritems(): values = {'text': {'main': unicode(text)}, 'value': unicode(int(value))} meta['columns'][column]['values'].append(values) else: if column in metadata.formats: f = metadata.formats[column] if '.' in f: meta['columns'][column]['type'] = "float" else: meta['columns'][column]['type'] = "int" else: # Infer meta from data if data is not None: # print "VAR '{}' NOT IN value_labels".format(column) column_values = data[column].dropna() if len(column_values) > 0: # Get the first "not nan" value from the column value = column_values.values[0] if isinstance(value, pd.np.float64): # Float AND Int because savReaderWriter loads them both as float64 meta['columns'][column]['text'] = {'main': [column]} meta['columns'][column]['type'] = "float" if (data[column].dropna() % 1).sum() == 0: if (data[column].dropna() % 1).unique() == [0]: try: data[column] = data[column].astype('int') except: pass meta['columns'][column]['type'] = "int" elif isinstance(value, unicode) or isinstance(value, str): # Strings meta['columns'][column]['text'] = {'main': [column]} meta['columns'][column]['type'] = "string" if column in metadata.varTypes: pass if column in metadata.varSets: pass if column in metadata.varAttributes: pass if column in metadata.varRoles: pass if column in metadata.measureLevels: pass # Some labels are empty strings. if column in metadata.varLabels: meta['columns'][column]['text'] = {'main': metadata.varLabels[column]} for mrset in metadata.multRespDefs: # meta['masks'][mrset] = {} # 'D' is "multiple dichotomy sets" in SPSS # 'C' is "multiple category sets" in SPSS if metadata.multRespDefs[mrset]['setType'] == 'C': 'C' # meta['masks'][mrset]['type'] = "categorical set" elif metadata.multRespDefs[mrset]['setType'] == 'D': 'D' # meta['masks'][mrset]['type'] = "dichotomous set" # meta['masks'][mrset]['countedValue'] = metadata.multRespDefs[mrset]['countedValue'] varNames = metadata.multRespDefs[mrset]['varNames'] # meta, data[mrset] = delimited_from_dichotomous(meta, data[varNames], mrset) data[mrset] = condense_dichotomous_set(data[varNames], values_from_labels=False) meta['columns'][mrset] = { 'type': 'delimited set', 'text': {'main': metadata.multRespDefs[mrset]['label']}, 'values': [ { 'text': {'main': metadata.varLabels[varName]}, 'value': v } for v, varName in enumerate(varNames, start=1) ] } idx = meta['sets']['data file']['items'].index('columns@%s' % (varNames[0])) items = meta['sets']['data file']['items'] meta['sets']['data file']['items'] = items[:idx] + ['columns@%s' % (mrset)] + items[idx+len(varNames):] # meta['masks'][mrset]['text'] = [metadata.multRespDefs[mrset]['label']] # meta['masks'][mrset]['items'] = [] # for var_name in metadata.multRespDefs[mrset]['varNames']: # meta['masks'][mrset]['items'].append({'source':"columns@{0}".format(var_name)}) # df = make_delimited_from_dichotmous(data[common_vars[var]]) return meta, data
def extract_sav_meta(sav_file, name="", data=None, ioLocale='en_US.UTF-8', ioUtf8=True, dichot=None, dates_as_strings=False, text_key="en-GB", engine='savReaderWriter'): if engine == 'readstat': df, metadata = pyreadstat.read_sav(sav_file, encoding=ioLocale.split(".")[-1], metadataonly=True) meta = start_meta(text_key=text_key) meta['info']['text'] = 'Converted from SAV file {}.'.format(name) meta['info']['from_source'] = {'pandas_reader': 'sav'} meta['sets']['data file']['items'] = [ 'columns@{}'.format(varName) for varName in metadata.column_names ] for index, column in enumerate(metadata.column_names): meta['columns'][column] = {} meta['columns'][column]['name'] = column meta['columns'][column]['parent'] = {} if column in metadata.variable_value_labels: meta['columns'][column]['values'] = [] meta['columns'][column]['type'] = "single" for value, text in metadata.variable_value_labels[ column].items(): values = { 'text': { text_key: str(text) }, 'value': int(value) } meta['columns'][column]['values'].append(values) # if user has stored single answer data as a string rather than number # we convert it to floats and store non convertables as nan (with coerce) if column in data.columns and data[column].dtype == 'O': data[column] = pd.to_numeric(data[column], errors='coerce', downcast='float') else: if column in metadata.original_variable_types: f = metadata.original_variable_types[column] if 'DATETIME' in f: if dates_as_strings: # DATETIME fields from SPSS are currently # being read in as strings because there's an # as-yet undetermined discrepancy between the # input and output dates if datetime64 is used meta['columns'][column]['type'] = 'string' else: meta['columns'][column]['type'] = 'date' data[column] = pd.to_datetime(data[column]) elif f.startswith('A'): meta['columns'][column]['type'] = 'string' elif '.' in f: meta['columns'][column]['type'] = "float" else: meta['columns'][column]['type'] = "int" # add the variable label to the meta meta['columns'][column]['text'] = { text_key: metadata.column_labels[index] } return meta, data elif engine == 'savReaderWriter': if dichot is None: dichot = {'yes': 1, 'no': 0} """ see parse_sav_file doc """ with sr.SavHeaderReader(sav_file, ioLocale=ioLocale, ioUtf8=ioUtf8) as header: # Metadata Attributes # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles', # 'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats', # 'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments', # 'fileLabel', 'missingValues'] metadata = header.dataDictionary(True) meta = start_meta(text_key=text_key) meta['info']['text'] = 'Converted from SAV file {}.'.format(name) meta['info']['from_source'] = {'pandas_reader': 'sav'} meta['sets']['data file']['items'] = [ 'columns@{}'.format(varName) for varName in metadata.varNames ] # This should probably be somewhere in the metadata # weight_variable_name = metadata.caseWeightVar # Descriptions of attributes in metadata are are located here : # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files for column in metadata.varNames: meta['columns'][column] = {} meta['columns'][column]['name'] = column meta['columns'][column]['parent'] = {} if column in metadata.valueLabels: # ValueLabels is type = 'single' (possibry 1-1 map) meta['columns'][column]['values'] = [] meta['columns'][column]['type'] = "single" for value, text in metadata.valueLabels[column].items(): values = { 'text': { text_key: str(text) }, 'value': int(value) } meta['columns'][column]['values'].append(values) else: if column in metadata.formats: f = metadata.formats[column] if 'DATETIME' in f: if dates_as_strings: # DATETIME fields from SPSS are currently # being read in as strings because there's an # as-yet undetermined discrepancy between the # input and output dates if datetime64 is used meta['columns'][column]['type'] = 'string' else: meta['columns'][column]['type'] = 'date' data[column] = pd.to_datetime(data[column]) elif f.startswith('A'): meta['columns'][column]['type'] = 'string' elif '.' in f: meta['columns'][column]['type'] = "float" else: meta['columns'][column]['type'] = "int" else: # Infer meta from data if data is not None: # print "VAR '{}' NOT IN value_labels".format(column) column_values = data[column].dropna() if len(column_values) > 0: # Get the first "not nan" value from the column value = column_values.values[0] if isinstance(value, pd.np.float64): # Float AND Int because savReaderWriter loads them both as float64 meta['columns'][column]['text'] = { text_key: [column] } meta['columns'][column]['type'] = "float" if (data[column].dropna() % 1).sum() == 0: if (data[column].dropna() % 1).unique() == [0]: try: data[column] = data[column].astype( 'int') except: pass meta['columns'][column]['type'] = "int" elif isinstance(value, str) or isinstance( value, str): # Strings meta['columns'][column]['text'] = { text_key: [column] } meta['columns'][column]['type'] = "string" if column in metadata.varTypes: pass if column in metadata.varSets: pass if column in metadata.varAttributes: pass if column in metadata.varRoles: pass if column in metadata.measureLevels: pass # Some labels are empty strings.note if column in metadata.varLabels: meta['columns'][column]['text'] = { text_key: metadata.varLabels[column] } for mrset in metadata.multRespDefs: # meta['masks'][mrset] = {} # 'D' is "multiple dichotomy sets" in SPSS # 'C' is "multiple category sets" in SPSS varNames = list(metadata.multRespDefs[mrset]['varNames']) # Find the index where there delimited set should be inserted # into data, which is immediately prior to the start of the # dichotomous set columns dls_idx = data.columns.tolist().index(varNames[0]) if metadata.multRespDefs[mrset]['setType'] == 'C': # Raise if value object of columns is not equal if not all(meta['columns'][v]['values'] == meta['columns'][ varNames[0]]['values'] for v in varNames): msg = 'Columns must have equal values to be combined in a set: {}' raise ValueError(msg.format(varNames)) # Concatenate columns to set df_str = data[varNames].astype('str') dls = df_str.apply(lambda x: ';'.join([ v.replace('.0', '') for v in x.tolist() if not v in ['nan', 'None'] ]), axis=1) + ';' dls.replace({';': np.NaN}, inplace=True) # Get value object values = meta['columns'][varNames[0]]['values'] elif metadata.multRespDefs[mrset]['setType'] == 'D': # Generate the delimited set from the dichotomous set dls = condense_dichotomous_set(data[varNames], values_from_labels=False, **dichot) # Get value object values = [{ 'text': { text_key: metadata.varLabels[varName] }, 'value': int(v) } for v, varName in enumerate(varNames, start=1)] else: continue # Insert the delimited set into data data.insert(dls_idx, mrset, dls) # Generate the column meta for the new delimited set meta['columns'][mrset] = { 'name': mrset, 'type': 'delimited set', 'text': { text_key: metadata.multRespDefs[mrset]['label'] }, 'parent': {}, 'values': values } # Add the new delimited set to the 'data file' set df_items = meta['sets']['data file']['items'] df_items.insert(df_items.index('columns@{}'.format(varNames[0])), 'columns@{}'.format(mrset)) data = data.drop(varNames, axis=1) for varName in varNames: df_items.remove('columns@{}'.format(varName)) del meta['columns'][varName] return meta, data
def extract_sav_meta(sav_file, name="", data=None, ioLocale='en_US.UTF-8', ioUtf8=True, dichot=None, dates_as_strings=False, text_key="main"): if dichot is None: dichot = {'yes': 1, 'no': 0} """ see parse_sav_file doc """ with sr.SavHeaderReader(sav_file, ioLocale=ioLocale, ioUtf8=ioUtf8) as header: # Metadata Attributes # ['valueLabels', 'varTypes', 'varSets', 'varAttributes', 'varRoles', # 'measureLevels', 'caseWeightVar', 'varNames', 'varLabels', 'formats', # 'multRespDefs', 'columnWidths', 'fileAttributes', 'alignments', # 'fileLabel', 'missingValues'] metadata = header.dataDictionary(True) meta = start_meta(text_key=text_key) meta['info']['text'] = 'Converted from SAV file {}.'.format(name) meta['info']['from_source'] = {'pandas_reader': 'sav'} meta['sets']['data file']['items'] = [ 'columns@{}'.format(varName) for varName in metadata.varNames ] # This should probably be somewhere in the metadata # weight_variable_name = metadata.caseWeightVar # Descriptions of attributes in metadata are are located here : # http://pythonhosted.org/savReaderWriter/#savwriter-write-spss-system-files for column in metadata.varNames: meta['columns'][column] = {} meta['columns'][column]['name'] = column meta['columns'][column]['parent'] = {} if column in metadata.valueLabels: # ValueLabels is type = 'single' (possibry 1-1 map) meta['columns'][column]['values'] = [] meta['columns'][column]['type'] = "single" for value, text in metadata.valueLabels[column].iteritems(): values = { 'text': { text_key: unicode(text) }, 'value': int(value) } meta['columns'][column]['values'].append(values) else: if column in metadata.formats: f = metadata.formats[column] if 'DATETIME' in f: if dates_as_strings: # DATETIME fields from SPSS are currently # being read in as strings because there's an # as-yet undetermined discrepancy between the # input and output dates if datetime64 is used meta['columns'][column]['type'] = 'string' else: meta['columns'][column]['type'] = 'date' data[column] = pd.to_datetime(data[column]) elif f.startswith('A'): meta['columns'][column]['type'] = 'string' elif '.' in f: meta['columns'][column]['type'] = "float" else: meta['columns'][column]['type'] = "int" else: # Infer meta from data if data is not None: # print "VAR '{}' NOT IN value_labels".format(column) column_values = data[column].dropna() if len(column_values) > 0: # Get the first "not nan" value from the column value = column_values.values[0] if isinstance(value, pd.np.float64): # Float AND Int because savReaderWriter loads them both as float64 meta['columns'][column]['text'] = { text_key: [column] } meta['columns'][column]['type'] = "float" if (data[column].dropna() % 1).sum() == 0: if (data[column].dropna() % 1).unique() == [0]: try: data[column] = data[column].astype( 'int') except: pass meta['columns'][column]['type'] = "int" elif isinstance(value, unicode) or isinstance( value, str): # Strings meta['columns'][column]['text'] = { text_key: [column] } meta['columns'][column]['type'] = "string" if column in metadata.varTypes: pass if column in metadata.varSets: pass if column in metadata.varAttributes: pass if column in metadata.varRoles: pass if column in metadata.measureLevels: pass # Some labels are empty strings. if column in metadata.varLabels: meta['columns'][column]['text'] = { text_key: metadata.varLabels[column] } for mrset in metadata.multRespDefs: # meta['masks'][mrset] = {} # 'D' is "multiple dichotomy sets" in SPSS # 'C' is "multiple category sets" in SPSS if metadata.multRespDefs[mrset]['setType'] == 'C': 'C' # meta['masks'][mrset]['type'] = "categorical set" elif metadata.multRespDefs[mrset]['setType'] == 'D': 'D' varNames = metadata.multRespDefs[mrset]['varNames'] # Find the index where there delimited set should be inserted # into data, which is immediately prior to the start of the # dichotomous set columns dls_idx = data.columns.tolist().index(varNames[0]) # Generate the delimited set from the dichotomous set dls = condense_dichotomous_set(data[varNames], values_from_labels=False, **dichot) # Insert the delimited set into data data.insert(dls_idx, mrset, dls) # Generate the column meta for the new delimited set meta['columns'][mrset] = { 'name': mrset, 'type': 'delimited set', 'text': { text_key: metadata.multRespDefs[mrset]['label'] }, 'parent': {}, 'values': [{ 'text': { text_key: metadata.varLabels[varName] }, 'value': int(v) } for v, varName in enumerate(varNames, start=1)] } # Add the new delimited set to the 'data file' set df_items = meta['sets']['data file']['items'] df_items.insert(df_items.index('columns@{}'.format(varNames[0])), 'columns@{}'.format(mrset)) data = data.drop(varNames, axis=1) for varName in varNames: df_items.remove('columns@{}'.format(varName)) del meta['columns'][varName] return meta, data
# #-------- dbspss = "C:/Users/yoni/Desktop/IMO Nfi Entry/spss iom/TestSpss.sav" # cnx = mysql.connector.connect(user='******', password='', host='127.0.0.1', database='pty_test') #open MySql database cursor = cnx.cursor() #load cursor cursorUPD = cnx.cursor() #load cursor other cursor for update the record cursor.execute('SELECT * FROM SPSST') #run sql query and put it on cursor #cursor.execute('SELECT concat("b'",name,"'") FROM spss_val WHERE 1 ORDER BY concat("b'",name,"'") ASC') remaining_rows = cursor.fetchall( ) #fetch all every selected rows and put it on cursor with savReaderWriter.SavHeaderReader( dbspss) as header: #open spss file for collecting meta data metadata = header.all() #get all meta data from spss # the following array declaration help to create [[],[],.....] myouter = [] #declare array for outer array myinner = [] #declare array for inner element # the following nested loop used to create array structor for r in remaining_rows: #loop itteration for every database selected rows myinner = [] #this piece of code help us to clear existion value for i in r: #itteration for every data columen of databse if isinstance(i, str): myinner.append( i.encode('UTF-8') ) #every row/column data converted to bytes which is allow us to insert on spss file and append on inner array elif i == None: #i replace a null value to empity string and append it on the array
writer = csv.writer(open('EMX_attributes.csv', 'wb'), delimiter=',') writer.writerow(['name'] + ['label'] + ['description'] + ['entity'] + ['dataType'] + ['refEntity'] + ['nillable'] + ['idAttribute']) #For each file in filePath, check if file exists and ends with the SPSS file extension. for file in os.listdir(filePath): file = filePath + file if os.path.isfile(file) and file.endswith('.sav'): #Read SPSS file and get header + meta data with savReaderWriter.SavReader(file) as reader: columns = reader.header with savReaderWriter.SavHeaderReader(file) as header: valueLabels = header.all().valueLabels #only categoricals varLabels = header.all().varLabels #description measureLevels = header.all().measureLevels #all variables #Store data for the first two columns of EMX attributes for column in columns: labelColumn = column.lower().replace('_', ' ') column = column.replace(' ', '_') if len(column) > 30: print column + " has more than 30 characters!" columnList.append(column) labelColumnList.append(labelColumn)