Ejemplo n.º 1
0
    def start(self, n_jobs=1, pre_dispatch='2*n_jobs'):
        trial = []
        self._setup_options(self.config)
        print self.get_name()
        t0 = time()
        self.data = datautil.load_dataset(self.dataname, self.data_path, categories=self.data_cat, rnd=self.seed,
                                          shuffle=True, percent=self.split, keep_subject=True)
        self.print_lap("Loaded", t0)

        self.data = self.vectorize(self.data)

        cv = self.cross_validation_data(self.data, folds=self.folds, trials=self.trials, split=self.split)

        seeds = np.arange(len(cv)) * 10 + 10

        expert = exputil.get_expert(cfgutil.get_section_options(self.config, 'expert'), size=(len(self.data.train.target),self.data.train.sizes.max()))

        expert.fit(self.data.train.bow, y=self.data.train.target, vct=self.vct)

        lrnr_setup= {'vct':self.vct, "sent_tk":self.sent_tokenizer,  "cost_model":self.cost_model,
                     'validation_set':self.validation_set}

        lrnr_type = cfgutil.get_section_option(self.config, 'learner', 'type')

        neu_threshold = cfgutil.get_section_option(self.config, 'expert', 'threshold')

        if lrnr_type in ['utility-cheat','const-cheat','const-cheat-noisy']:
            lrnr_setup.update({'snip_model':expert.oracle, 'threshold':neu_threshold})

        learners = [exputil.get_learner(cfgutil.get_section_options(self.config, 'learner'),
                                        seed=s, **lrnr_setup) for s in seeds]
        self.print_lap("\nPreprocessed", t0)
        # ===================================
        parallel = Parallel(n_jobs=n_jobs, verbose=True,
                            pre_dispatch=pre_dispatch)
        scores = parallel(delayed(self.main_loop_jobs,check_pickle=False)(learners[t], expert, self.budget, self.bootstrap_size,
                                                  self.data, tr[0],tr[1], t)
                         for t, tr in enumerate(cv))
        # ===================================

        self.print_lap("\nDone trials", t0)

        # save the results

        self.report_results(scores)
Ejemplo n.º 2
0
 def get_name(self):
     cfg = cfgutil.get_section_options(self.config, 'learner')
     post = cfgutil.get_section_option(self.config, 'experiment', 'fileprefix')
     name = "data-{}-lrn-{}-ut-{}-snip-{}-cal-{}{}".format(self.dataname, cfg['type'], cfg['utility'],
                                                           cfg['snippet'], cfg['calibration'], post)
     return name
Ejemplo n.º 3
0
 def get_name(self):
     cfg = cfgutil.get_section_options(self.config, 'learner')
     post = cfgutil.get_section_option(self.config, 'experiment', 'fileprefix')
     name = "data-{}-lrn-{}-ut-{}-{}".format(self.dataname, cfg['type'], cfg['loss_function'],
                                                            post)
     return name