Example #1
0
 def decode_file(self, source_filename, result_filename):
     """Decodes given file.
     
     Arguments:
     source_filename -- name of file to decode
     result_filename -- name of file to save decoded data
     """
     csv_file = CsvFile(source_filename, delim=';', quote='"')
     csv_data = CsvData(csv_file)
     
     new_header = self.decode_header(csv_data.get_header())
     
     new_rows = []
     row = csv_data.get_next_row(row_type='list')
     i = 0
     while row:
         i += 1
         changed_row = row[:]
         is_jst = row[4] in ['z', 'Z']
         
         changed_row[1] = self.teryt_decoder.get_name(row[1])
         if changed_row[1] is None:
             print i
         if is_jst:
             changed_row[2] = self.teryt_decoder.get_name(row[1] + row[2])
             if changed_row[2] is None:
                 print i
             changed_row[3] = self.jst_decoder.get_name(row[6][1:]) # decoder has xyz, file has 0xyz
             if changed_row[3] is None:
                 print i
             changed_row[4] = u'Związek JST'
         else:
             type = self.teryt_decoder.get_type(row[1])
             if row[2] != '00':
                 changed_row[2] = self.teryt_decoder.get_name(row[1] + row[2])
                 if changed_row[2] is None:
                     print i
                 type = self.teryt_decoder.get_type(row[1] + row[2])
             else:
                 changed_row[2] = ''
                 
             if row[3] != '00':
                 changed_row[3] = self.teryt_decoder.get_name(row[1] + row[2] + row[3])
                 if changed_row[3] is None:
                     print i
                 type = self.teryt_decoder.get_type(row[1] + row[2] + row[3])
             else:
                 changed_row[3] = ''
             changed_row[4] = type
         
         self.clean_row(changed_row)
         new_rows.append(changed_row)
         row = csv_data.get_next_row(row_type='list')
     
     csv_file.close()
     
     
     new_data = Data([new_header] + new_rows, result_filename)
     new_data.save()
Example #2
0
def insert_hierarchy(csv_file, json_hierarchy, schema_descr, teryt_data=None):
    """Inserts hierarchy into csv_file using hierarchy schema.
    
    Arguments:
    csv_file -- CsvFile object representing data
    json_hierarchy -- hierarchy schema
    schema_descr -- schema describing fields in collection
    teryt_data -- data with TERYT codes, if None, then rows' id will be not
                  connected with TERYT codes
    """
    print 'Trying to clean hierarchy in data'
    csv_file.reset()
    csv_data = CsvData(csv_file)
    hierarchy_cleaner = HierarchyInserter(csv_data, json_hierarchy, schema_descr,
                                          add_id=True, teryt_data=teryt_data)
    hierarchy_cleaner.insert_hierarchy()
    if hierarchy_cleaner.all_rows_correct():
        print 'All rows have correct hierarchy'
    else:
        error_file_name = consts['hierarchy_errors_name']
        print 'Some errors in hierarchy have been found:'
        print 'Saving them to file %s' % error_file_name
        try:
            error_file = open(error_file_name, 'wb')
        except IOError:
            print 'Can not open file %s, no information will be saved.' % error_file_name
        else:
            error_file.write(hierarchy_cleaner.get_hierarchy_errors_log())
            error_file.close()
        exit()
    
    clean_data = hierarchy_cleaner.get_modified_rows()
    print 'Successfully cleaned hierarchy'
    
    csv_name = csv_file.get_filename()
    new_csv_name = csv_name.rstrip('.csv') + '_upload.csv'
    
    print 'Trying to save data with cleaned hierarchy in %s' % new_csv_name
    try:
        data_file = Data(clean_data, new_csv_name)
        data_file.save()
    except IOError:
        exit('Error: can\'t open file %s. Exiting now.' % new_csv_name)
    else:
        print 'Successfully saved data in %s' % new_csv_name
    
    return new_csv_name
Example #3
0
 def change_file(self, fname=None):
     """Transforms file to new format and saves it.
     
     Arguments:
     fname -- name of transformed file, if None, then generated from
              input file's name
     """
     new_header = self.change_header(self.data.get_header())
     new_rows = []
     
     row = self.data.get_next_row(row_type='list')
     while row:
         new_row = self.change_row(row)
         new_rows.append(new_row)
         row = self.data.get_next_row(row_type='list')
     
     if fname is None:
         fname = self.name[:-4] + '_mod.csv'
     
     save_data = Data([new_header] + new_rows, fname)
     save_data.save(quoting=csv.QUOTE_NONE)