def test_writerows_pd_np_issue63(): """ issue #63 "ufunc 'isnan' not supported for the input types" Caused by strings that contained NaN values """ if skip: raise SkipTest buff = StringIO(u"""n1,n2,s1,s2 1,1,a,a 2,2,b,bb 3,,c,""") desired = [[1.0, 1.0, b'a', b'a'], [2.0, 2.0, b'b', b'bb'], [3.0, None, b'c', b'']] df = pd.read_csv(buff, chunksize=10**6, sep=',').get_chunk() arr = df.values savFileName = join(gettempdir(), "check.sav") kwargs = dict(varNames=list(df), varTypes=dict(n1=0, n2=0, s1=1, s2=2), savFileName=savFileName, ioUtf8=True) # numpy with srw.SavWriter(**kwargs) as writer: writer.writerows(arr) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual # pandas with srw.SavWriter(**kwargs) as writer: writer.writerows(df) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual
def main(): if len(sys.argv) != 4: print ('Usage: ' + sys.argv[0] + ' headfile datafile outputfile') os._exit(1) try: with open(sys.argv[1], 'r') as head_file: head = json.load(head_file) head['valueLabels'] = int_keys(head['valueLabels'], head['varTypes']) data_path = replace_last(sys.argv[2], '.json', '_*.json') with savReaderWriter.SavWriter(sys.argv[3], head['varNames'], head['varTypes'], head['valueLabels'], head['varLabels'], head['varFormats'], None, head['measureLevels'], head['columnWidths'], head['alignments'], ioUtf8=True) as writer: for f in sorted(glob.glob(data_path)): with open(f, 'r') as data_file: for record in ijson.items(data_file, 'item'): writer.writerow(record) os.remove(sys.argv[1]) os.remove(sys.argv[2]) for f in glob.glob(data_path): os.remove(f) print ('File ' + sys.argv[3] + ' successfully created.') except: print ('Error: ', sys.exc_info()) traceback.print_exc() os._exit(1)
def write_sav(path_sav, data, **kwargs): """ Write the given records to a SAV file at path_sav. Using the various definitions indicated by the packed kwargs, write the given set of records to a SAV file at the location indicated by path_sav. For a full explanation of the kwargs used please see the savReaderWriter library documentation here: http://pythonhosted.org/savReaderWriter/ Parameters ---------- path_sav : str The full path, including extension, indicating where the output file should be saved. records : list A list of records (so a list of lists) holding the row data to be saved in the output SAV file. **kwargs : various Remaining keyword arguments passed to savReaderWriter.SavWriter(). Returns ------- None """ with srw.SavWriter(path_sav, ioUtf8=True, **kwargs) as writer: records = data.fillna(writer.sysmis).values.tolist() for record in records: writer.writerow(record)
def test_check_segfault_numeric(self): """Test if incorrect specification raises ctypes.ArgumentError, not segfault""" valueLabels = {b"a_numeric": {b"1": b"male", b"2": b"female"}} with self.assertRaises(ctypes.ArgumentError): with rw.SavWriter(*self.args, valueLabels=valueLabels) as writer: writer.writerows(self.records)
def test_check_segfault_char(self): """Test if incorrect specification raises ctypes.ArgumentError, not segfault""" # c_char_p is wrapped in c_char_p3k in py3k.py, hence a separate test valueLabels = {b"a_string": {1: b"male", 2: b"female"}} with self.assertRaises(ctypes.ArgumentError): with rw.SavWriter(*self.args, valueLabels=valueLabels) as writer: writer.writerows(self.records)
def test_writerows_tuple(): records = tuple([tuple(record) for record in desired]) savFileName = "output_tuple.sav" with srw.SavWriter(savFileName, *args) as writer: writer.writerows(records) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual
def write_test(savFileName, nrows, ncols): varNames = ['v_%s' % i for i in range(ncols)] record = [b'aaaaa', 0] * (int(ncols) // 2) cmd = "5 if i % 2 == 0 else 0" varTypes = {varName: eval(cmd) for i, varName in enumerate(varNames)} with rw.SavWriter(savFileName, varNames, varTypes) as writer: for i in range(nrows): writer.writerow(record)
def func(self, savFileName): self.outfile = tempfile.mktemp(suffix="_out.sav") with rw.SavWriter(self.outfile, [b'v1'], {b'v1': 0}) as writer: for i in range(10): writer.writerow([i]) with rw.SavReader(self.outfile) as reader: self.assertEqual(reader.all(), [[float(i)] for i in range(10)]) self.assertTrue(os.path.exists(self.outfile))
def test_accented_varSet_codepage_mode(): with srw.SavWriter(**kwargs) as writer: for i in range(10): writer.writerow([1, 1]) with srw.SavHeaderReader(kwargs["savFileName"]) as header: actual = header.varSets desired = {b'\xc3\xbcberhaupt': [b'salbegin', b'salary']} remove(kwargs["savFileName"]) assert actual == desired, actual
def test_writerows_namedtuple(): Record = namedtuple("Record", args[0]) records = [Record(*record) for record in desired] savFileName = "output_namedtuple.sav" with srw.SavWriter(savFileName, *args) as writer: writer.writerows(records) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual
def test_writerows_pandas(): if skip: raise SkipTest df = pd.DataFrame({"a": range(0, 20, 2), "b": range(1, 20, 2)}) df.loc[0, "a"] = np.nan savFileName = "output_pd.sav" with srw.SavWriter(savFileName, *args) as writer: writer.writerows(df) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual
def test_accented_varSet_unicode_mode(): kwargs["varSets"] = {u'\xfcberhaupt': varNames} kwargs["ioUtf8"] = True with srw.SavWriter(**kwargs) as writer: for i in range(10): writer.writerow([1, 1]) with srw.SavHeaderReader(kwargs["savFileName"], ioUtf8=True) as header: actual = header.varSets desired = {u'\xfcberhaupt': [u'salbegin', u'salary']} remove(kwargs["savFileName"]) assert actual == desired, actual
def test_writerows_numpy(): if skip: raise SkipTest data = [range(10), range(10, 20)] array = np.array(data, dtype=np.float64).reshape(10, 2) array[0, 0] = np.nan savFileName = "output_np.sav" with srw.SavWriter(savFileName, *args) as writer: writer.writerows(array) with srw.SavReader(savFileName) as reader: actual = reader.all(False) assert actual == desired, actual
def escrituraspss(): savFileName = 'someFile.sav' records = [[b'Test1', 1, 1], [b'Test2', 2, 1]] varNames = ['var1', 'v2', 'v3'] varTypes = {'var1': 5, 'v2': 0, 'v3': 0} with savReaderWriter.SavWriter(savFileName, varNames, varTypes, ioLocale='Spanish_Spain.1252') as writer: for record in records: writer.writerow(record)
def extra_moc_plot(flux_input, lam_input, flux_moc, lam_moc, error_flux_input, chi_sqr): srw.SavWriter('extra_moc_plot.sav',flux_input, lam_input, flux_moc, lam_moc, error_flux_input) #check input if min(lam_input) < 1: print('input fail2') if max(lam_input) > 45: print('input fail3') #restrict the maximum and minimum values to be that of the IR minlam = min(lam_input) maxlam = max(lam_input) index = [] for i in lam_moc: if (lam_moc[i] >= minlam and lam_moc[i] <= maxlam): index.append(lam_moc[i]) flux_moc = flux_moc[index] lam_moc = lam_moc[index] #interpolate and subtract the mocassin data from the input # Specifies the kind of interpolation as a string (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’ where # ‘zero’, ‘slinear’, ‘quadratic’ and ‘cubic’ refer to a spline interpolation of zeroth, first, second or third order) or # as an integer specifying the order of the spline interpolator to use. Default is ‘linear’. interpfunc = interpolate.interp1d(lam_moc, flux_moc, kind='linear') flux_moc_int = interpfunc(lam_input) #idl equivalent flux_moc_int = interpol(flux_moc, lam_moc, lam_input) #moc to many points. flux_subtracted = [] for i in flux_input: flux_subtracted.append(flux_input[i] - flux_moc_int) #calc and print useful things median = statistics.median(flux_subtracted) result =[np.mean(flux_subtracted),np.var(flux_subtracted),sci.skew(flux_subtracted),sci.kurtosis(flux_subtracted)] std_dev = np.std(flux_subtracted) chi_sqr = chi_squared(flux_input, flux_moc_int, error_flux_input, 10) plt.subplot(221) plt.gca().set_color_cycle(['red', 'black', 'green', 'blue','green', 'blue', 'black']) #plot and stuff x, y and plot a line a y = 0 for refrence plt.plot(lam_input, flux_subtracted,'D', [.1,100], [0,0], linewidth = 1) plt.title = 'Real - Moc' plt.errorbar(lam_input, flux_subtracted, error_flux_input, 3) plt.subplot(222) plt.plot(lam_input, flux_input, '^', lam_moc, flux_moc, 'D',[.1, 100], [0, 0], linestyle = 1) plt.subplot(223) plt.plot(lam_input, flux_input, 's', lam_moc, flux_moc, 'D', [.1, 100], [0, 0], linestyle = 1)
def writeSav(df, fname): varNames = df.columns varTypes = {} for c in df.columns: #print("DATA TYPE ",df[c].dtype) if is_string_dtype(df[c]): # Currently falls over if null/float values as tries to get len so just use 1 for now #df[c].fillna(b'',inplace=True) df[c].fillna('', inplace=True) # Also need to change eveyrthing to bytes #df[c] = df[c].apply(lambda elt: str(int(elt)).encode() if isinstance(elt, float) else str(elt).encode()) df[c] = df[c].apply(lambda elt: str(elt).encode() if isinstance( elt, float) else str(elt).encode()) d = df[c].map(len).max() #d = 1 elif is_numeric_dtype(df[c]): d = 0 varTypes[c] = d with spss.SavWriter(fname, varNames, varTypes, ioUtf8=False) as writer: writer.writerows(df)
def mocassin_fail_amiy(j, username, diffuse, directoryname, outfoldername, starname): print("RUN FAILED! Writing output.") print("Failed on line number" + (j + 1) + "of AMIY_input.txt") with srw.SavReader('/Users/' + username + '/mocassin-rw_changes/AMIY_number.sav') as reader: AMIY_number = reader.next() id = ssi(AMIY_number) AMIY_number += 1 srw.SavWriter( '/Users/' + username + '/mocassin-rw_changes/AMIY_number.sav', AMIY_number) if (diffuse[j]): type = 'SN' else: type = 'RSG' directoryname = "/Users/" + username + "/mocassin-rw_changes/output/" + type + "/" + id + '_' + starname + '_FAIlED' os.system("mkdir " + directoryname) outfoldername = type + "/" + id + '_' + starname + '_FAIlED' os.chdir('/Users/' + username + '/mocassin-rw_changes/output') os.system('cp dustGrid.out ' + directoryname + '/dustGrid_' + id + '.out.txt') os.system('cp runinfo.txt ' + directoryname + '/runinfo_' + id + '.txt') os.system('cp SED.out ' + directoryname + '/SED_' + id + '.out.txt') if (diffuse[j]): os.system('cp equivalentTau.out ' + directoryname + '/equivalentTau_' + id + '.out.txt') else: os.system('cp tauNu.out ' + directoryname + '/tauNu_' + id + '.out.txt') os.system('cp /Users/' + username + '/mocassin-rw_changes/input/input.in ' + directoryname + '/input_' + id + '.in.txt') os.system('cp /Users/' + username + '/mocassin-rw_changes/input/ndust/nDUST ' + directoryname + '/nDUST_' + id + '.in.txt')
def generate_reports(self, keywordsfilename, before, after): reportbase = 'keywords_aftonbladet_idg_' + after.strftime( '%Y-%m-%d') + '_' + before.strftime('%Y-%m-%d') conf = open(keywordsfilename) keywords = json.load(conf) try: shutil.rmtree(reportbase) except OSError: pass try: os.remove(reportbase + '.zip') except OSError as e: pass os.mkdir(reportbase) self._reportname = os.path.join(reportbase, reportbase) f = open(self._reportname + '.html', 'w') self._rownames = [ 'idx', 'fetched', 'keywords', 'publication', 'date', 'updated', 'author', 'author_email', 'url', 'title', 'fulltext_plain', ] spss_types = { 'idx': 0, 'fetched': 34, 'keywords': 150, 'publication': 30, 'date': 34, 'updated': 34, 'author': 50, 'author_email': 50, 'url': 100, 'title': 140, 'fulltext_plain': 10000, } with savReaderWriter.SavWriter( self._reportname + '.sav', self._rownames, spss_types, ioUtf8=True, ) as self._SPSSwriter: f.write(self._generate_report(keywords, before, after)) f.close() subprocess.call([ 'wkhtmltopdf', self._reportname + '.html', self._reportname + '.pdf' ]) subprocess.call(['cp', keywordsfilename, reportbase]) subprocess.call([ 'rm', self._reportname + '.html', ]) subprocess.call([ '7z', 'a', '-r', '-mx=9', '-v5m', reportbase + '.zip', reportbase ])
data[valLabel] = data[valLabel].apply(lambda x: int(x.split(':')[0])) nonLabels = [] [ nonLabels.append(y) if y not in valueLabelsData else False for y in list(data) ] str_columns = data[nonLabels].select_dtypes(exclude=['float', 'int']) int_columns = data[nonLabels].select_dtypes(include=['float', 'int']) for int_column in int_columns: varTypes.update({int_column.encode(): 0}) for str_column in str_columns: varTypes.update({str_column.encode(): 5}) data[str_column] = data[str_column].apply(lambda x: x.encode()) records = data.to_dict('split')['data'] varNames = [y.encode() for y in list(data)] Labels = {} for varName in varNames: Labels.update({varName: varName}) savFileName = INSTANCE + '_' + FORMID + '.sav' with spss.SavWriter(savFileName, varNames, varTypes, valueLabels=valueLabels, varLabels=Labels) as writer: for record in records: writer.writerow(record)
def graph(self, info: str = None, output=("plt", "excel")) -> str: """ Graph the recorded statistics in a plt plot, in an excel spreadsheet or in an ssps compatible file. Args: output (Tuple[str]): the output formats to use. info(str): Additional notes for the plt plot. If None is passed the function will ask via input so if you don't want info, pass an empty string. Returns: str: folder name for output """ compatible_out = ["plt", "excel", "spss"] e = False for ro in output: if ro not in compatible_out: e = True print( "WARNING, output format {} is not supported, it will be skipped" .format(ro)) if e: print("We currently support " + str(compatible_out)) if info is None: info = input("Enter additional information about the sim: ") titles = [ "Number Of Agents", "Average Agent Mass", "Amount of Food Consumed", "Average Agent IQ", "Average Agent EQ", "Average breeding mass divider", "Average Agent Breed Chance", "Fight count relative to population size", "Help count relative to population size", "Ignore count relative to population size", "Number of groups", "Close family ration in group" ] values = [ self.number_of_agents_OT, self.mass_OT, self.eat_OT, self.iq_OT, self.iq_OT, self.breed_mass_div_OT, self.breed_chance_OT, self.fight_OT, self.help_OT, self.nothing_OT, self.relative_groups_OT, self.close_family_in_group_OT ] extention = "png" fn = "graphs-0.3/" + self.get_fn() os.mkdir(fn) try: if "plt" in output: if len(titles) != len(values): raise Exception( "Error len of titles must match len of vars") fig, axs = plt.subplots(len(values), sharex='all', figsize=(20, 60)) metadata = dict() for i in range(len(values)): axs[i].plot(self.i_OT, values[i], linewidth=0.25) axs[i].axes.set_ylim([0, max(values[i])]) axs[i].set_ylabel(titles[i]) metadata["Final" + titles[i]] = values[i][-1] axs[0].axes.set_xlim([0, self.dataPoints]) axs[0].set_title( "Simulation with {} initial agents and {} steps\nDate: {}\nNotes: {}\n\nStats:\n{}\n" .format(len(self.agents), self.gcsteps, time.strftime("%D"), info, self.stats()), ) axs[-1].set_xlabel("Number Of Data Points") plt.tight_layout() plt.autoscale() pltfn = fn + "/plt." + extention fig.savefig(pltfn, bbox_inches='tight') # save graph # add metadata: im = Image.open(pltfn) meta = PngImagePlugin.PngInfo() for x in metadata: meta.add_text(x, str(metadata[x])) im.save(pltfn, extention, pnginfo=meta) except: print("error in generating plt file") transposed_data = [] for i in range(self.dataPoints): transposed_data.append([j[i] for j in values]) try: if "excel" in output: if len(values[0]) > 1048576: print("to manny data points, skipping excel") else: wb = openpyxl.Workbook(write_only=True) sheet = wb.create_sheet() sheet.append(titles) for i in transposed_data: sheet.append(i) wb.save(fn + "/excel.xlsx") except: print("error in generating excel file") if "spss" in output: savFileName = fn + '/spss.sav' varNames = [i.replace(" ", "_") for i in titles] varTypes = dict() for t in varNames: varTypes[t] = 0 with savReaderWriter.SavWriter(savFileName, varNames, varTypes) as writer: for i in range(self.dataPoints): writer.writerow(transposed_data[i]) return os.getcwd() + "\\" + fn.replace("/", "\\")
ioLocale = "german" if is_windows else "de_DE.cp1252" b_settings = dict(ioUtf8=sav.UNICODE_BMODE, ioLocale=ioLocale) # read SPSS file data with sav.SavReader(in_savFileName, rawMode=True, **b_settings) as data: in_records = data.all(False) # read SPSS file metadata with sav.SavHeaderReader(in_savFileName, **b_settings) as header: metadata = header.dataDictionary() #pprint(metadata) # write (unmodified) data to SPSS file out_savFileName = os.path.join(tempfile.gettempdir(), 'out.sav') metadata.update(b_settings) with sav.SavWriter(out_savFileName, **metadata) as writer: writer.writerows(in_records) # Now test whether input and output are the same class Test_MetadataRoundTrip(unittest.TestCase): def setUp(self): self.maxDiff = None def test_data_same(self): with sav.SavReader(out_savFileName, rawMode=True, **b_settings) as data: out_records = data.all(False) out_encoding = data.fileEncoding self.assertEqual("utf_8", out_encoding) self.assertEqual(in_records, out_records)
def pseudonymise(input_file, columns=None, names=None, mapping_file=None, output_file=None): """ Create UUID integer for certain columns of an SAV file. If no columns are selected (either by name '-n' or by number '-c') then automatically the first column is selected for pseudonymisation. \b :param input_file: points to input SAV file. :param columns: columns by number to pseudonymise, can be comma separated list (e.g. 0,5,7). :param names: columns by name to pseudonymise. Given as a comma separated list (e.g. 'PIDnumber,PIDnumberRelation,OtherPID') :param mapping_file: file with a previously created mapping. Reuses the pseudonymisation and adds mappings for new numbers. :param output_file: path to output file, defaults to same directory as input sav with suffix '-pseudonymised.sav'. """ import sys import os from uuid import uuid4 # Trick to prevent savReaderWriter from complaining in stdout about # missing numpy, which it does not need. sys.stdout = open(os.devnull, 'w') import savReaderWriter sys.stdout = sys.__stdout__ uuid_map = {} if mapping_file: click.echo('Using uuid map: {}'.format(mapping_file)) with open(mapping_file, 'r') as f: for line in f.readlines(): if not line: continue fis, uuid = line.strip().split('\t') uuid_map[fis] = uuid if output_file is None: output_file = input_file.rsplit('.', 1)[0] + '-pseudonymised.sav' output_map = input_file.rsplit('.', 1)[0] + '-mapping.tsv' with savReaderWriter.SavReader(input_file) as reader: int_columns = [] header = [c.decode() for c in reader.getHeader(None)] if columns is not None: int_columns += [int(c) for c in columns.split(',') if c != ''] if names is not None: for name in names.split(','): try: idx = header.index(name) except ValueError: raise SystemExit( "Column {} not found in file. Aborting.".format(name)) int_columns.append(idx) if not int_columns: int_columns = [0] click.echo("Pseudonymising columns: {!r}".format( [header[c] for c in int_columns])) with savReaderWriter.SavWriter( savFileName=output_file, varNames=reader.varNames, varTypes=reader.varTypes, valueLabels=reader.valueLabels, varLabels=reader.varLabels, formats=reader.formats, missingValues=reader.missingValues, measureLevels=reader.measureLevels, columnWidths=reader.columnWidths, alignments=reader.alignments, varSets=reader.varSets, varRoles=reader.varRoles, varAttributes=reader.varAttributes, fileAttributes=reader.fileAttributes, fileLabel=reader.fileLabel, multRespDefs=reader.multRespDefs, caseWeightVar=reader.caseWeightVar) as writer: for record in list(reader): for n in int_columns: # Safer to convert to string, as this is how the map would be # read from file if it is used another time. fis = str(record[n]) if fis not in uuid_map: uuid_map[fis] = uuid4().int record[n] = uuid_map[fis] writer.writerow(record) click.echo('Writing pseudonymised sav: {}'.format(output_file)) # write uuid map to disk with open(output_map, 'w') as f: click.echo('Writing mapping file: {}'.format(output_map)) for fis, uuid in uuid_map.items(): f.write('{}\t{}\n'.format(fis, uuid)) click.echo('Finished. Good bye.')
for col in odf.columns: if str(odf[col].dtype) == 'object': varTypes[col] = 1024 elif 'date' in str(odf[col].dtype): varTypes[col] = 0 else: varTypes[col] = 0 varTypes['date'] = 0 logging.debug(f"varTypes: {varTypes}") colsSave = list(odf.columns) # https://pythonhosted.org/savReaderWriter/generated_api_documentation.html#savwriter with srw.SavWriter(pathSav, varNames=colsSave, varTypes=varTypes, valueLabels=valLabs, ioUtf8=True, formats={'date': 'DATETIME17'}) as writer: for record in records: record[0] = writer.spssDateTime(record[0].encode(), '%Y-%m-%d %H:%M:%S') writer.writerow(record) logging.info(f"Save {os.path.abspath(pathSav)}") timeend = dt.datetime.now() logging.info( f"\nstart time: {timestart}\nfinish time: {timeend}\nduration: {(timeend - timestart)}" ) print(
# for i, line in enumerate(reader): for i in range(0, 2): newline = [] newline.append(reader[i][2]) # username newline.append(reader[i][12]) # posttext newline.append(reader[i][17]) # posttextpolarity newline.append(reader[i][18]) # posttextsubjectivity newline.append(reader[i][66]) # clustername newrow.append(newline) # print newline varNames = ['UserName', 'PostText', 'PostTextPolarity', 'PostTextSubjectivity', 'ClusterName'] varTypes = {'UserName': 1, 'PostText': 1, 'PostTextPolarity': 0, 'PostTextSubjectivity': 0, 'ClusterName': 1} # varTypes = {'UserName': 5, 'v2': 0, 'v3': 0} with savReaderWriter.SavWriter(filenametowrite, varNames, varTypes) as writer : for x in newrow: writer.writerows(x) """ """ """ # username -> [2] # posttext -> [12] # polarity -> [17] # subjectivity -> [18] # clustername -> [66] newline.append(line[2]) newline.append(line[12]) newline.append(line[17])
#!/usr/bin/python # -*- coding: utf-8 -*- import sys #sys.setdefaultencoding("utf-8") #reload(sys) import savReaderWriter import locale import os import collections from obdc import * varLabels = {'var1': 'This is variable 1', 'v2': 'This is v2!', 'bdate': 'dob'} file = "Agropecuario.sav" #file="Hogares.sav" preguntas, dicpreguntas, vartypes, varlabels, medicion, valuelabels = metadata( "Agropecuario-ccc.mdb") with savReaderWriter.SavWriter(file, preguntas, vartypes, valuelabels, varlabels, formats=None, missingValues=None, measureLevels=medicion, ioLocale='Spanish_Spain.1252') as sav: pass
def test_writerows_erroneous_flat_n(): records = [0, 1] # wrong!, savFileName = "output_error1.sav" with srw.SavWriter(savFileName, *args) as writer: assert_raises(TypeError, writer.writerows, records)
def test_writerows_erroneous_flat_empty(): records = [] # wrong! string_args = ["v1", "v2"], dict(v1=1, v2=1) savFileName = "output_error3.sav" with srw.SavWriter(savFileName, *string_args) as writer: assert_raises(ValueError, writer.writerows, records)
i.encode('UTF-8') ) #every row/column data converted to bytes which is allow us to insert on spss file and append on inner array elif i == None: #i replace a null value to empity string and append it on the array myinner.append(b'') #print('i got none') else: myinner.append(i) #print(myinner) myouter.append( myinner ) #after every inner array preparation i append it on outer array #-->19-nov-2018 print(myouter) YonisavFileName = 'TestSpss.sav' #define on which spss data to be inserted with savReaderWriter.SavWriter( YonisavFileName, mode=b'ab', *metadata ) as writer: #open spss data for insert by append style "b'ab'" work the append for record in myouter: #do itteration for every row #-->19-nov-2018 print(record[2])#This #-->19-nov-2018 print(record[25]) #Date formating start here---------------------- #The following 4 lines deal with date time conversion #i have two spss variable which has date data type for those variable i need to have date formating #i manually get a date variable array index which is 2 and 25 #i create a date value using spssDateTime functon and put value on the record array spssDateValue = writer.spssDateTime(record[2], '%d-%m-%Y') record[2] = spssDateValue spssDateValue = writer.spssDateTime(record[25], '%d-%m-%Y') record[25] = spssDateValue #Date formating end here------------------------------
formats = [ "S%d" % data.varTypes[v] if data.varTypes[v] else np.float64 for v in data.varNames ] dtype = np.dtype({'names': data.varNames, 'formats': formats}) structured_array = np.array([tuple(record) for record in records], dtype=dtype) allDataArray = np.array( records ) # in the most recent version one can directly read to numpy arrays print(records) # reading metadata from SPSS file with sav.SavHeaderReader(spss_file, ioUtf8=True, ioLocale=ioLocale) as header: metadata = header.dataDictionary( asNamedtuple=False) # Why does this take so long? pprint.pprint(metadata) # writing unmodified data with sav.SavWriter(spss_file_out, overwrite=True, ioUtf8=True, ioLocale=ioLocale, mode=b'wb', refSavFileName=None, **metadata) as writer: for i, record in enumerate(structured_array): writer.writerow(record)