def decode_file(self, source_filename, result_filename): """Decodes given file. Arguments: source_filename -- name of file to decode result_filename -- name of file to save decoded data """ csv_file = CsvFile(source_filename, delim=';', quote='"') csv_data = CsvData(csv_file) new_header = self.decode_header(csv_data.get_header()) new_rows = [] row = csv_data.get_next_row(row_type='list') i = 0 while row: i += 1 changed_row = row[:] is_jst = row[4] in ['z', 'Z'] changed_row[1] = self.teryt_decoder.get_name(row[1]) if changed_row[1] is None: print i if is_jst: changed_row[2] = self.teryt_decoder.get_name(row[1] + row[2]) if changed_row[2] is None: print i changed_row[3] = self.jst_decoder.get_name(row[6][1:]) # decoder has xyz, file has 0xyz if changed_row[3] is None: print i changed_row[4] = u'Związek JST' else: type = self.teryt_decoder.get_type(row[1]) if row[2] != '00': changed_row[2] = self.teryt_decoder.get_name(row[1] + row[2]) if changed_row[2] is None: print i type = self.teryt_decoder.get_type(row[1] + row[2]) else: changed_row[2] = '' if row[3] != '00': changed_row[3] = self.teryt_decoder.get_name(row[1] + row[2] + row[3]) if changed_row[3] is None: print i type = self.teryt_decoder.get_type(row[1] + row[2] + row[3]) else: changed_row[3] = '' changed_row[4] = type self.clean_row(changed_row) new_rows.append(changed_row) row = csv_data.get_next_row(row_type='list') csv_file.close() new_data = Data([new_header] + new_rows, result_filename) new_data.save()
def insert_hierarchy(csv_file, json_hierarchy, schema_descr, teryt_data=None): """Inserts hierarchy into csv_file using hierarchy schema. Arguments: csv_file -- CsvFile object representing data json_hierarchy -- hierarchy schema schema_descr -- schema describing fields in collection teryt_data -- data with TERYT codes, if None, then rows' id will be not connected with TERYT codes """ print 'Trying to clean hierarchy in data' csv_file.reset() csv_data = CsvData(csv_file) hierarchy_cleaner = HierarchyInserter(csv_data, json_hierarchy, schema_descr, add_id=True, teryt_data=teryt_data) hierarchy_cleaner.insert_hierarchy() if hierarchy_cleaner.all_rows_correct(): print 'All rows have correct hierarchy' else: error_file_name = consts['hierarchy_errors_name'] print 'Some errors in hierarchy have been found:' print 'Saving them to file %s' % error_file_name try: error_file = open(error_file_name, 'wb') except IOError: print 'Can not open file %s, no information will be saved.' % error_file_name else: error_file.write(hierarchy_cleaner.get_hierarchy_errors_log()) error_file.close() exit() clean_data = hierarchy_cleaner.get_modified_rows() print 'Successfully cleaned hierarchy' csv_name = csv_file.get_filename() new_csv_name = csv_name.rstrip('.csv') + '_upload.csv' print 'Trying to save data with cleaned hierarchy in %s' % new_csv_name try: data_file = Data(clean_data, new_csv_name) data_file.save() except IOError: exit('Error: can\'t open file %s. Exiting now.' % new_csv_name) else: print 'Successfully saved data in %s' % new_csv_name return new_csv_name
def change_file(self, fname=None): """Transforms file to new format and saves it. Arguments: fname -- name of transformed file, if None, then generated from input file's name """ new_header = self.change_header(self.data.get_header()) new_rows = [] row = self.data.get_next_row(row_type='list') while row: new_row = self.change_row(row) new_rows.append(new_row) row = self.data.get_next_row(row_type='list') if fname is None: fname = self.name[:-4] + '_mod.csv' save_data = Data([new_header] + new_rows, fname) save_data.save(quoting=csv.QUOTE_NONE)