class CacheFilter: """ A class which takes an exporter and returns records which have changed by comparing them against cached data """ def __init__(self, export_scheme, options): self.export_scheme = export_scheme self.options = options # Load our cached state if given if not options.state_file: # fake being a shelf self.cached_state = UserDict() self.cached_state.close = lambda: None else: self.cached_state = shelve.open(options.state_file) def extract_row_key(self, row): """ Extract a tuple from a row to uniquely identify it """ key = [row['case_id'], row['syndrome_id']] for form in self.export_scheme.include_forms: base = '%s.form_id' % form if base in row: key.append(row[base]) else: n = 0 while '%s.%d' % (base, n) in row: key.append(row['%s.%d' % (base, n)]) n += 1 return tuple(key) def __iter__(self): # Run over value tuples checking against cached state for row in dictifying_filter_gen(self.export_scheme.row_gen()): row_key = self.extract_row_key(row) row_key_repr = repr(row_key) current_values = [row[f] for f in self.options.monitored_fields] cached_values = self.cached_state.get( row_key_repr, [None] * len(self.options.monitored_fields)) changed_values = [ (f, cached_values[n], row[f]) for n, f in enumerate(self.options.monitored_fields) if row[f] != cached_values[n] ] if changed_values: #print row_key, '->', changed_values yield (row, changed_values) def update_cache(self, row): row_key = self.extract_row_key(row) row_key_repr = repr(row_key) current_values = [row[f] for f in self.options.monitored_fields] if not self.options.dry_run: self.cached_state[row_key_repr] = current_values def commit(self): # Clean up self.cached_state.close()
class CacheFilter: """ A class which takes an exporter and returns records which have changed by comparing them against cached data """ def __init__(self, export_scheme, options): self.export_scheme = export_scheme self.options = options # Load our cached state if given if not options.state_file: # fake being a shelf self.cached_state = UserDict() self.cached_state.close = lambda: None else: self.cached_state = shelve.open(options.state_file) def extract_row_key(self, row): """ Extract a tuple from a row to uniquely identify it """ key = [row['case_id'], row['syndrome_id']] for form in self.export_scheme.include_forms: base = '%s.form_id' % form if base in row: key.append(row[base]) else: n = 0 while '%s.%d' % (base, n) in row: key.append(row['%s.%d' % (base, n)]) n += 1 return tuple(key) def __iter__(self): # Run over value tuples checking against cached state for row in dictifying_filter_gen(self.export_scheme.row_gen()): row_key = self.extract_row_key(row) row_key_repr = repr(row_key) current_values = [ row[f] for f in self.options.monitored_fields ] cached_values = self.cached_state.get(row_key_repr, [None] * len(self.options.monitored_fields)) changed_values = [ (f, cached_values[n], row[f]) for n, f in enumerate(self.options.monitored_fields) if row[f] != cached_values[n] ] if changed_values: #print row_key, '->', changed_values yield (row, changed_values) def update_cache(self, row): row_key = self.extract_row_key(row) row_key_repr = repr(row_key) current_values = [ row[f] for f in self.options.monitored_fields ] if not self.options.dry_run: self.cached_state[row_key_repr] = current_values def commit(self): # Clean up self.cached_state.close()