Example #1
0
   def run(self):
      print >> sys.stderr, "\nLoading data set <%s>" % self.datafile
      full_data = amf.data.load(self.datafile)

      ###

      print >> sys.stderr, "\nTesting FM Training Time"
      total = len([ x for x in self.each_combo()])
      count = 0
      for (reg, params), tsize in self.each_combo():
         print >> sys.stderr, " %d/%d " % (count, total),
         count += 1

         label = labelize(reg, params)

         for i in xrange(FM_TRAINING_ITERATIONS):
            dataset, = amf.data.random_subsets(full_data, [tsize])

            fm = amf.ForwardMapping(reg, params, self.num_dep)

            t = misc.StopWatch()
            fm.train(dataset)
            t = t.read()
            #print >> sys.stderr, '   Regression <%s> took %.2f seconds to train on tsize=<%d>' \
            #   % (label, t, tsize)

            self.results.fm_report(label, tsize, FMTRAININGTIME, t)

      ###

      print >> sys.stderr, "\nCreating training sets and validation set"
      sets = amf.data.random_subsets(full_data, self.training_sizes + [self.validation_size])
      training_sets = sets[:-1]
      validation_set = sets[-1]

      ###

      print >> sys.stderr, "\nTesting the accuracy and speed of the forward mapping"
      total = len([ x for x in self.each_combo()])
      count = 0
      for (reg, params), tsize in self.each_combo():
         print >> sys.stderr, " %d/%d " % (count, total),
         count += 1

         label = labelize(reg, params)

         for i in xrange(FM_QUERY_ITERATIONS):
            #print >> sys.stderr, "   Testing <%s> with dataset of size %d" % (label, tsize)
            dataset, val = amf.data.random_subsets(full_data, [tsize, self.validation_size])

            # might as well measure the training time here, too
            fm = amf.ForwardMapping(reg, params, self.num_dep)
            t = misc.StopWatch()
            fm.train(dataset)
            t = t.read()
            #print >> sys.stderr, "      Regression <%s> took %.2f seconds to train on tsize=<%d>" \
            #   % (label, t, tsize)

            self.results.fm_report(label, tsize, FMTRAININGTIME, t)

            #print >> sys.stderr, "      Running queryies..."
            for config, yval in val:
               t = misc.StopWatch()
               p = fm.predict(config)
               t = t.read()
               deviation = tuple( pv - yvalv  for pv,  yvalv in zip(p, yval) )
               self.results.fm_report(label, tsize, FMACCURACY, deviation)
               self.results.fm_report(label, tsize, FMQUERYTIME, t)
            #print >> sys.stderr, "      Done"

         #print >> sys.stderr, "      Average Error:", \
         #      self.results.average(label, tsize, FMACCURACY, m = (lambda l : tuple( abs(p) for p in l )) )
         #print >> sys.stderr, "      Average Query Time:", \
         #      self.results.average(label, tsize, FMQUERYTIME)
      ###

      
      # use the largest training size for this experiment
      tsize = max(self.training_sizes)

      # use kNN for this experiment
      dataset, validation = amf.data.random_subsets(full_data, [tsize, self.validation_size])
      fm = amf.ForwardMapping(amf.regression.kNN, [], self.num_dep)
      fm.train(dataset)
      print >> sys.stderr, "\nTesting RM Training Time, Accuracy and Query Time"

      total = len(self.rm_granularities)
      count = 0

      rm = None

      for gran in self.rm_granularities:
         #print >> sys.stderr, "\n\n GRANULARITY %d !!!" % gran
         print >> sys.stderr, " %d/%d " % (count, total),
         count += 1

         for i in xrange(RM_TRAINING_ITERATIONS):
            del rm
            # Training Time Experiment
            t = misc.StopWatch()


            # FIXME, this should be a configuration, instead of a manual change!
            rm = amf.ReverseMapping(fm, [(0.0, 1.0), (0.0, 1.0)], gran)
            t = t.read()
            #print >> sys.stderr, "took %.2f seconds to train" % t

            self.results.rm_report(gran, RMTRAININGTIME, t)

            # Error vs. Forward Mapping
            for s in rm.simplexes:
               # find the average config
               configs = [ p[0] for p in s.corners]
               avg_config = misc.col_average(configs)

               # find the average value of the SLPs
               slps = [ p[1] for p in s.corners ]
               avg_values = misc.col_average(slps)
            
               deviations = misc.list_sub(avg_values, fm.predict(avg_config))
               #print >> sys.stderr, "deviated %s from the fm" % repr(deviations)

               self.results.rm_report(gran, RMACCURACYFM, deviations)

            # Error vs. ABM
            for config, slp in validation:
               # this only does one SLP right now! FIXME
               dist = rm.distance_to(0, config, slp)

               if dist != None:
                  #print >> sys.stderr, "configuration %s with value %s is %.3f away from the RM" \
                  #   % (repr(config), repr(slp[0]), dist)

                  self.results.rm_report(gran, RMACCURACYABM, dist)


            # Query Time
            for config, slp in validation:
               t = misc.StopWatch()
               rm.all_intersections(slp)
               t = t.read()
               #print >> sys.stderr, "query too %.2f seconds" % t

               self.results.rm_report(gran, RMQUERYTIME, t)

      ###

      print >> sys.stderr, "\nDone"
   def run(self):
      print >> sys.stderr, "\nLoading data set <%s>" % self.datafile
      full_data = amf.data.load(self.datafile)

      ###
      
      # use the largest training size for this experiment
      tsize = max(self.training_sizes)

      # use kNN for this experiment
      print>> sys.stderr, "\nGenerating Data Sets"
      dataset, validation = amf.data.random_subsets(full_data, [tsize, self.validation_size])

      print >> sys.stderr, "\n Training the forward mapping"
      fm = amf.ForwardMapping(amf.regression.kNN, [], self.num_dep)
      fm.train(dataset)
      print >> sys.stderr, "\nTesting RM Training Time, Accuracy and Query Time"

      total = len(self.rm_granularities)
      count = 0

      rm = None

      for gran in self.rm_granularities:
         #print >> sys.stderr, "\n\n GRANULARITY %d !!!" % gran
         print >> sys.stderr, " %d/%d " % (count, total),
         count += 1

         for i in xrange(RM_TRAINING_ITERATIONS):
            del rm
            # Training Time Experiment
            t = misc.StopWatch()


            # FIXME, this should be a configuration, instead of a manual change!
            rm = amf.ReverseMapping(fm, [(0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0)], gran)
            t = t.read()
            #print >> sys.stderr, "took %.2f seconds to train" % t


            num_simplexes = len(rm.simplexes)

            subset = [ rm.simplexes[random.randint(0, num_simplexes-1)] for trolololo in range(200) ]

            # Error vs. Forward Mapping
            for s in subset:
               # find the average config
               configs = [ p[0] for p in s.corners]
               avg_config = misc.col_average(configs)

               # find the average value of the SLPs
               slps = [ p[1] for p in s.corners ]
               avg_values = misc.col_average(slps)
            
               deviations = misc.list_sub(avg_values, fm.predict(avg_config))
               #print >> sys.stderr, "deviated %s from the fm" % repr(deviations)

               self.results.rm_report(gran, RMACCURACYFM, deviations)

            print 'testing query time'

            # Query Time
            for config, slp in validation:
               t = misc.StopWatch()
               rm.all_intersections(slp)
               t = t.read()
               #print >> sys.stderr, "query too %.2f seconds" % t

               self.results.rm_report(gran, RMQUERYTIME, t)

      ###

      print >> sys.stderr, "\nDone"