Beispiel #1
0
def measure_init( db ):
  """Establish a bounding box around the database and normalizing factors for
     all columns, so that distances become comparable."""
  global db_scale, discretization_const, distance1d
  def distance1d( x, y, z ): return abs( ( x - y ) / z )
  db_lb, db_ub = hint_tools.bounding_hint( *db )
  db_scale = tuple( y - x for x, y in zip( db_lb, db_ub ) )
  discretization_const = len( db_scale ) * hint_tools.log( len( db ) )
  volume.epsilon = float_info.epsilon ** ( 1 / len( db_scale ) )
  return db_lb, db_ub
Beispiel #2
0
def measure_init( db ):
  """Establish a bounding box around the database and normalizing factors for
     all columns, so that distances become comparable."""
  global db_scale, discretization_const, distance1d, entropies
  def distance1d( x, y, z ): return abs( ( x - y ) / z )
  db_lb, db_ub = hint_tools.bounding_hint( *db )
  db_scale = tuple( y - x for x, y in zip( db_lb[:numeric_cols], db_ub[:numeric_cols] ) )
  discretization_const = numeric_cols * hint_tools.log( len( db ) ) + \
                         ( len( db_scale ) - numeric_cols ) * ( hint_tools.log( 6 ) / 2 - hint_tools.log( 2 ) )
  N = len( db )
  entropies = tuple( hint_tools.log( N, 2 ) \
                     - ( n * hint_tools.log( n, 2 ) + ( N - n ) * hint_tools.log( N - n, 2 ) ) / N \
                     for n in map( sum, tuple( zip( *db ) )[numeric_cols:] ) )
  volume.epsilon = float_info.epsilon ** ( 1 / numeric_cols )
  return db_lb, db_ub