def _example(): savFileName = 'someFile.sav' records = [[b'Test1', 1, 1], [b'Test2', 2, 1]] varNames = ['var1', 'v2', 'v3'] varTypes = {'var1': 5, 'v2': 0, 'v3': 0} with SavWriter(savFileName, varNames, varTypes) as writer: for record in records: writer.writerow(record)
def single_process_run(chunked_data_iterator): """Process and write chunks serially.""" with SavWriter(args.outfile, save_info['column_names'], save_info['types'], measureLevels = save_info['measure_levels'], alignments = save_info['alignments'], columnWidths = save_info['column_widths'], formats = save_info['formats']) as writer: for i, chunk in enumerate(chunked_data_iterator): chunk = (i, chunk) writer.writerows(process_chunk(chunk)[1])
def test_sav_special_char_columns(self): survey = create_survey_from_xls( _logger_fixture_path('grains/grains.xls')) export_builder = ExportBuilder() export_builder.TRUNCATE_GROUP_TITLE = True export_builder.set_survey(survey) export_builder.INCLUDE_LABELS = True export_builder.set_survey(survey) for sec in export_builder.sections: sav_options = export_builder._get_sav_options(sec['elements']) sav_file = NamedTemporaryFile(suffix=".sav") # No exception is raised SavWriter(sav_file.name, **sav_options)
def test_date_conversion(self): with tempfile.NamedTemporaryFile(suffix='.sav') as tmpfile: with SavWriter(tmpfile.name, ['date'], {b'date': 0}, formats={b'date': b'EDATE40'}, ioUtf8=True, ioLocale="C.UTF-8") as writer: record = [writer.spssDateTime(b"2000-01-01", "%Y-%m-%d")] writer.writerow(record) with SavReader(tmpfile.name, returnHeader=False, ioUtf8=True, ioLocale="C.UTF-8") as reader: date = list(reader)[0][0] self.assertEqual('2000-01-01', date)
def multi_process_run(chunked_data_iterator): """Process chunks in parallel.""" with SavWriter(args.outfile, save_info['column_names'], save_info['types'], measureLevels = save_info['measure_levels'], alignments = save_info['alignments'], columnWidths = save_info['column_widths'], formats = save_info['formats']) as writer: pool = mp.Pool(mp.cpu_count()-1) for i, df in pool.imap_unordered(process_chunk, enumerate(chunked_data_iterator), 1): print('Writing chunk {}.'.format(i)) writer.writerows(df) pool.close() pool.join()
def get_body(header): headers = config['authheader'] headers.update({'Content-type': 'application/json'}) r = requests.get( config['dburl'] + '/_design/doctype/_view/doctype?startkey="data"&endkey="data"', headers = headers ) ids = [] for row in yaml.safe_load(r.text)['rows']: ids.append(row['id']) blocknumber = 0 blocksize = 200 with SavWriter(config['outputfile'], header['varNames'], header['varTypes'], valueLabels = header['valueLabels'], varSets = header['varSets'], varAttributes = header['varAttributes'], varRoles = header['varRoles'], measureLevels = header['measureLevels'], caseWeightVar = header['caseWeightVar'], varLabels = header['varLabels'], formats = header['formats'], multRespDefs = header['multRespDefs'], columnWidths = header['columnWidths'], fileAttributes = header['fileAttributes'], alignments = header['alignments'], fileLabel = header['fileLabel'], missingValues = header['missingValues']) as writer: while blocknumber*blocksize < len(ids): document = dict( keys = ids[blocknumber*blocksize:(blocknumber+1)*blocksize]) blocknumber += 1 r = requests.post( config['dburl'] + '/_all_docs?include_docs=true', headers = headers, data = json.dumps(document) ) for row in yaml.safe_load(r.text)['rows']: orderedrow = [] for varName in header['varNames']: orderedrow.append(row['doc'][varName]) writer.writerow(orderedrow)
def test_sav_duplicate_columns(self): more_than_64_char = "akjasdlsakjdkjsadlsakjgdlsagdgdgdsajdgkjdsdgsj" \ "adsasdasgdsahdsahdsadgsdf" md = """ | survey | | | type | name | label | choice_filter | | | select one fts | fruit| Fruit | active=1 | | | integer | age | Age | | | | integer | {} | Resp2 | | | | begin group | {} | Resp | | | | integer | age | Resp | | | | text | name | Name | | | | begin group | {} | Resp2 | | | | integer | age | Resp2 | | | | integer | {} | Resp2 | | | | end group | | | | | | end group | | | | | choices | | | list name | name | label | active | | | fts | orange | Orange | 1 | | | fts | mango | Mango | 1 | """ md = md.format(more_than_64_char, more_than_64_char, more_than_64_char, more_than_64_char) survey = self.md_to_pyxform_survey(md) export_builder = ExportBuilder() export_builder.TRUNCATE_GROUP_TITLE = True export_builder.set_survey(survey) export_builder.INCLUDE_LABELS = True export_builder.set_survey(survey) for sec in export_builder.sections: sav_options = export_builder._get_sav_options(sec['elements']) sav_file = NamedTemporaryFile(suffix=".sav") # No exception is raised SavWriter(sav_file.name, **sav_options)
def to_zipped_sav(self, path, data, *args): def write_row(row, csv_writer, fields): sav_writer.writerow( [encode_if_str(row, field, True) for field in fields]) sav_defs = {} # write headers for section in self.sections: fields = [element['title'] for element in section['elements']]\ + self.EXTRA_FIELDS c = 0 var_labels = {} var_names = [] tmp_k = {} for field in fields: c += 1 var_name = 'var%d' % c var_labels[var_name] = field var_names.append(var_name) tmp_k[field] = var_name var_types = dict( [(tmp_k[element['title']], 0 if element['type'] in ['decimal', 'int'] else 255) for element in section['elements']] + [(tmp_k[item], 0 if item in ['_id', '_index', '_parent_index'] else 255) for item in self.EXTRA_FIELDS]) sav_file = NamedTemporaryFile(suffix=".sav") sav_writer = SavWriter(sav_file.name, varNames=var_names, varTypes=var_types, varLabels=var_labels, ioUtf8=True) sav_defs[section['name']] = { 'sav_file': sav_file, 'sav_writer': sav_writer } index = 1 indices = {} survey_name = self.survey.name for d in data: # decode mongo section names joined_export = dict_to_joined_export(d, index, indices, survey_name) output = ExportBuilder.decode_mongo_encoded_section_names( joined_export) # attach meta fields (index, parent_index, parent_table) # output has keys for every section if survey_name not in output: output[survey_name] = {} output[survey_name][INDEX] = index output[survey_name][PARENT_INDEX] = -1 for section in self.sections: # get data for this section and write to csv section_name = section['name'] sav_def = sav_defs[section_name] fields = [element['xpath'] for element in section['elements'] ] + self.EXTRA_FIELDS sav_writer = sav_def['sav_writer'] row = output.get(section_name, None) if type(row) == dict: write_row(self.pre_process_row(row, section), sav_writer, fields) elif type(row) == list: for child_row in row: write_row(self.pre_process_row(child_row, section), sav_writer, fields) index += 1 for section_name, sav_def in sav_defs.iteritems(): sav_def['sav_writer'].closeSavFile(sav_def['sav_writer'].fh, mode='wb') # write zipfile with ZipFile(path, 'w') as zip_file: for section_name, sav_def in sav_defs.iteritems(): sav_file = sav_def['sav_file'] sav_file.seek(0) zip_file.write(sav_file.name, "_".join(section_name.split("/")) + ".sav") # close files when we are done for section_name, sav_def in sav_defs.iteritems(): sav_def['sav_file'].close()
def to_zipped_sav(self, path, data, *args, **kwargs): total_records = kwargs.get('total_records') def write_row(row, csv_writer, fields): sav_writer.writerow( [encode_if_str(row, field, True) for field in fields]) sav_defs = {} # write headers for section in self.sections: sav_options = self._get_sav_options(section['elements']) sav_file = NamedTemporaryFile(suffix=".sav") sav_writer = SavWriter(sav_file.name, ioLocale="en_US.UTF-8", **sav_options) sav_defs[section['name']] = { 'sav_file': sav_file, 'sav_writer': sav_writer } media_xpaths = [] if not self.INCLUDE_IMAGES \ else self.dd.get_media_survey_xpaths() index = 1 indices = {} survey_name = self.survey.name for i, d in enumerate(data, start=1): # decode mongo section names joined_export = dict_to_joined_export(d, index, indices, survey_name, self.survey, d, media_xpaths) output = ExportBuilder.decode_mongo_encoded_section_names( joined_export) # attach meta fields (index, parent_index, parent_table) # output has keys for every section if survey_name not in output: output[survey_name] = {} output[survey_name][INDEX] = index output[survey_name][PARENT_INDEX] = -1 for section in self.sections: # get data for this section and write to csv section_name = section['name'] sav_def = sav_defs[section_name] fields = [element['xpath'] for element in section['elements']] sav_writer = sav_def['sav_writer'] row = output.get(section_name, None) if type(row) == dict: write_row(self.pre_process_row(row, section), sav_writer, fields) elif type(row) == list: for child_row in row: write_row(self.pre_process_row(child_row, section), sav_writer, fields) index += 1 track_task_progress(i, total_records) for section_name, sav_def in sav_defs.iteritems(): sav_def['sav_writer'].closeSavFile(sav_def['sav_writer'].fh, mode='wb') # write zipfile with ZipFile(path, 'w', ZIP_DEFLATED, allowZip64=True) as zip_file: for section_name, sav_def in sav_defs.iteritems(): sav_file = sav_def['sav_file'] sav_file.seek(0) zip_file.write(sav_file.name, "_".join(section_name.split("/")) + ".sav") # close files when we are done for section_name, sav_def in sav_defs.iteritems(): sav_def['sav_file'].close()
def write_to_SPSS(savFileName, records, varNames, varTypes): with SavWriter(savFileName, varNames, varTypes) as writer: for record in records: writer.writerow(record)
res2 = i["valuelabels"] if res2 == "0": valueLabels[name] = {} else: res3 = pickle.loads(res2) valueLabels[name] = res3 else: valueLabels[name] = {} formats[name] = i["formats"] savFileName = '/opt/someFile.sav' with SavWriter(savFileName=savFileName, varNames=varNames, varTypes=varTypes, formats=formats, varLabels=varLabels, valueLabels=valueLabels, ioUtf8=True, columnWidths={}) as writer: for row_data in query_data: sub_li = [] for i in range(len(my_columns_types)): sub_data = row_data[varNames[i]] if my_columns_types[i] == "VARCHAR": sub_li.append(json.loads(sub_data)) elif my_columns_types[i] == "DATETIME": sub_li.append(writer.spssDateTime(b'%s' % sub_data, '%Y-%m-%d %H:%M:%S')) elif my_columns_types[i] == "DATE": sub_li.append(writer.spssDateTime(b'%s' % sub_data, '%Y-%m-%d')) else: sub_li.append(sub_data) data.append(sub_li)
def create_sav_writer(filename, settings): from savReaderWriter import SavWriter return SavWriter(filename, ioUtf8=True, **settings)
def genreate_spss(self): self.adjust_data() mdt = my_datetime() nowtime = datetime.datetime.now().strftime("%Y%m%d") new_time1 = "%.6f" % float(time.time()) new_time3 = new_time1.split(".")[0] + new_time1.split(".")[1] filename = "u" + str( self.user_id) + "_" + str(nowtime) + "_" + str(new_time3) filepath = Config().get_content("filepath")["download_path"] if filepath: user_file_path = os.path.join(filepath, str(self.user_id)) time_now = datetime.datetime.now().strftime("%Y-%m-%d") user_subfilepath = os.path.join(user_file_path, time_now) if not os.path.exists(user_file_path): os.makedirs(user_file_path) if not os.path.exists(user_subfilepath): os.makedirs(user_subfilepath) else: filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "download") user_file_path = os.path.join(filepath, str(self.user_id)) time_now = datetime.datetime.now().strftime("%Y-%m-%d") user_subfilepath = os.path.join(user_file_path, time_now) if not os.path.exists(user_file_path): os.makedirs(user_file_path) if not os.path.exists(user_subfilepath): os.makedirs(user_subfilepath) savFileName = os.path.join(user_subfilepath, filename + ".sav") print(self.varLabels) with SavWriter(savFileName=savFileName, varNames=self.varNames, varTypes=self.varTypes, formats=self.formats, varLabels=self.varLabels, valueLabels=self.valueLabels, ioUtf8=True, columnWidths={}) as writer: for row_data in self.my_data: sub_li = [] for i in range(len(self.my_columns_types)): sub_data = row_data[self.varNames[i]] if self.my_columns_types[i] == "VARCHAR": sub_li.append(sub_data) elif self.my_columns_types[i] == "DATETIME": aaa = mdt.become_str(sub_data) sub_li.append( writer.spssDateTime(bytes(aaa, 'utf-8'), '%Y-%m-%d %H:%M:%S')) elif self.my_columns_types[i] == "DATE": sub_li.append( writer.spssDateTime('%s' % sub_data, '%Y-%m-%d')) else: sub_li.append(sub_data) self.data.append(sub_li) writer.writerows(self.data) return savFileName
def test_date_encoding(self): with SavWriter(self.savFileName, [b'date'], {b'date': 0}) as writer: seconds1 = writer.spssDateTime(b"2000-01-01", "%Y-%m-%d") seconds2 = writer.spssDateTime("2000-01-01", "%Y-%m-%d") self.assertEqual(seconds1, 13166064000.0) self.assertEqual(seconds1, seconds2)
#column_names and column_specifications are used by pandas.read_fwf to read Medi-Cal file. with open(config.county_load_info) as fp: column_names, column_specifications = zip(*json.load(fp)) #All columns should be brought in as strings. converters = {name: str for name in column_names} df = pd.read_fwf(medical_file, colspecs=column_specifications, header=None, names=column_names, converters=converters) df = common.drop_summary_row(df) df = common.drop_cinless_rows(df) with open(config.county_save_info) as fp: save_info = json.load(fp) with SavWriter(save_file_name, save_info['column_names'], save_info['types'], measureLevels=save_info['measure_levels'], alignments=save_info['alignments'], columnWidths=save_info['column_widths']) as writer: writer.writerows(df[save_info['column_names']].values) print('Program finished in: {}.'.format( str(datetime.now() - program_start_time)))