예제 #1
0
def verify_data( data, columns, hierarchy_indexes, start_ind ):
    ''' Detect eventual errors in the given file (parts inconsistent with
        columns and hierarchy). Return found errors in the list.
        start_ind - index of the first row of data
    '''
    row_types = get_row_types( columns, hierarchy_indexes )
    expected_len = len( row_types )

    errors = []
    hierarchies = {}

    log.description('Verifying data')
    for (i, row) in enumerate(data, start_ind):
        # TODO log it per 1%
        if i % 1000 == 0:
            log.description(i)

        if len( row ) != expected_len:
            errors.append( bad_len( i, len(row), expected_len ) )

        if not are_fields_correct( row_types, row ):
            errors.append( bad_fields( i, row_types, row ) )

        if is_row_hierarchy_empty( row, hierarchy_indexes ):
            errors.append( empty_hierarchy( i ) )

    if errors:
      log.error('%s error(s) found' % len(errors))
    else:
      log.description('Finished with no errors')

    return errors
예제 #2
0
    def upload(self, has_header=True, visible=True, restore=False):
        '''Main method of Uploader. Checks db counters, if any inconsistency
           is found, then ask if it should be removed. After that, checks data
           that is about to be uploaded. After this attempts to upload data.
           If any error occurs during that process, then removes uploaded data to
           that moment. Returns tuple containg boolean value that tells if it
           succeeded and name of the new endpoint.
           There are 4 optional parameters: has_header - if data file comes with header,
           visible - if endpoint should be visible after upload,
           restore - if state of db should be restored to the state pointed in debug_restore()
                   method. Use with CAUTION!
        '''
        # restore db state to a state before a recent data insertion
        if restore:
            self.debug_restore()

        # Check db counters
        init_endpoint_id = self.db.get_max_endpoint()
        init_dbtree_id   = self.db.get_max_dbtree_id()
        init_data_id     = self.db.get_max_data_id()

        # TODO move it to db module. data from db module should come correct!
        log.section('DB counters correctness')
        self.check_db_counters(init_endpoint_id, init_dbtree_id, init_data_id)
        log.end_section()

        # TODO move it to Meta class constructor!
        # Check if parents, columns and hierarchy from meta is correct
        log.section('Metadata correctness')
        try:
            log.description('Verifying metadata')
            self.check_correctness()
        except UploadDataException as e:
            log.error(e.get_error())
            return ( False, e.get_error() )
        log.end_section()

        # Check data, if any error is in data, stop processing and return list with errors
        log.section('Data correctness')
        errors = self.find_errors(has_header)
        if errors:
            return (False, errors)
        log.end_section()
        
        endpoint = None
        log.section('Data insertion')
        if self.debug:
            endpoint = self.insert_data_into_db(has_header, visible)
        else:
            try:
                endpoint = self.insert_data_into_db( has_header, visible )
            except UploadDataException as e:
                log.error('Failed.')
                log.error(e) 
                log.end_section()

                # cleanup after unseccessful upload
                self.remove_uploaded( init_endpoint_id, init_dbtree_id, init_data_id )
                exit(0)

        log.description('Done!')
        log.end_section()

        return (True, endpoint)