def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_directory = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, "") guesser_instance = guesser_class.load( guesser_directory) # type: AbstractGuesser if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}: char_skip = conf["guesser_char_skip"] elif self.fold == c.EXPO_FOLD: char_skip = conf["expo_char_skip"] else: char_skip = conf["buzzer_char_skip"] log.info( f"Generating and saving guesses for {self.fold} fold with char_skip={char_skip}..." ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], char_skip=char_skip) end_time = time.time() elapsed = end_time - start_time log.info( f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...") guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], "char") log.info("Done saving guesses") log.info( f"Generating and saving guesses for {self.fold} fold with full question..." ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], full_question=True) end_time = time.time() elapsed = end_time - start_time log.info( f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...") guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], "full") log.info("Done saving guesses") log.info( f"Generating and saving guesses for {self.fold} fold with first sentence" ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], first_sentence=True) end_time = time.time() elapsed = end_time - start_time log.info( f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...") guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], "first") log.info("Done saving guesses")
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_directory = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, '') guesser_instance = guesser_class.load( guesser_directory) # type: AbstractGuesser if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}: char_skip = conf['guesser_char_skip'] else: char_skip = conf['buzzer_char_skip'] log.info( f'Generating and saving guesses for {self.fold} fold with char_skip={char_skip}...' ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], char_skip=char_skip) end_time = time.time() elapsed = end_time - start_time log.info( f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...') guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], 'char') log.info('Done saving guesses') log.info( f'Generating and saving guesses for {self.fold} fold with full question...' ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], full_question=True) end_time = time.time() elapsed = end_time - start_time log.info( f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...') guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], 'full') log.info('Done saving guesses') log.info( f'Generating and saving guesses for {self.fold} fold with first sentence' ) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], first_sentence=True) end_time = time.time() elapsed = end_time - start_time log.info( f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...') guesser_class.save_guesses(guess_df, guesser_directory, [self.fold], 'first') log.info('Done saving guesses')
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) reporting_directory = AbstractGuesser.reporting_path( self.guesser_module, self.guesser_class, self.config_num, "") # In the cases of huge parameter sweeps on SLURM its easy to accidentally run out of /fs/ storage. # Since we only care about the results we can get them, then delete the models. We can use the regular # GuesserReport to preserve the model guesser_directory = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, "") param_path = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, f"guesser_params.pickle", ) guesses_files = [] if os.path.exists(c.QANTA_EXPO_DATASET_PATH): folds = [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD, c.EXPO_FOLD] else: folds = [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD] for f in folds: guesses_files.extend([ f"guesses_char_{f}.pickle", f"guesses_full_{f}.pickle", f"guesses_first_{f}.pickle", ]) guesses_paths = [ AbstractGuesser.output_path(self.guesser_module, self.guesser_class, self.config_num, f) for f in guesses_files ] log.info(f'Running: "cp {param_path} {reporting_directory}"') shell(f"cp {param_path} {reporting_directory}") for g_path in guesses_paths: log.info(f'Running: "cp {g_path} {reporting_directory}"') shell(f"cp {g_path} {reporting_directory}") guesser_instance = guesser_class(self.config_num) for f in folds: guesser_instance.create_report(reporting_directory, f) log.info(f'Running: "rm -rf {guesser_directory}"') shell(f"rm -rf {guesser_directory}") for g_path in guesses_paths: shell(f"rm -f {g_path}")
def output(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_targets = [ LocalTarget(file) for file in guesser_class.files( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, '')) ] return [ LocalTarget( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, '')), LocalTarget( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, 'guesser_params.pickle')) ] + guesser_targets
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_instance = guesser_class() # type: AbstractGuesser qb_dataset = guesser_instance.qb_dataset() start_time = time.time() guesser_instance.train(qb_dataset.training_data()) end_time = time.time() guesser_instance.save( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, '')) params = guesser_instance.parameters() params['training_time'] = end_time - start_time params_path = AbstractGuesser.output_path(self.guesser_module, self.guesser_class, 'guesser_params.pickle') with open(params_path, 'wb') as f: pickle.dump(params, f)
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_instance = guesser_class( self.config_num) # type: AbstractGuesser qb_dataset = guesser_instance.qb_dataset() start_time = time.time() guesser_instance.train(qb_dataset.training_data()) end_time = time.time() guesser_instance.save( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, self.config_num, "")) params = guesser_instance.parameters() params["training_time"] = end_time - start_time params_path = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, "guesser_params.pickle", ) with open(params_path, "wb") as f: pickle.dump(params, f)
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_directory = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, '') guesser_instance = guesser_class.load( guesser_directory) # type: AbstractGuesser if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}: word_skip = conf['guesser_word_skip'] else: word_skip = conf['buzzer_word_skip'] log.info( 'Generating and saving guesses for {} fold with word_skip={}...'. format(self.fold, word_skip)) start_time = time.time() guess_df = guesser_instance.generate_guesses(self.n_guesses, [self.fold], word_skip=word_skip) end_time = time.time() log.info('Guessing on {} fold took {}s, saving guesses...'.format( self.fold, end_time - start_time)) guesser_class.save_guesses(guess_df, guesser_directory, [self.fold]) log.info('Done saving guesses')
def requires(self): yield DownloadData() if self.dependency_class is not None and self.dependency_module is not None: dependency_class = get_class(self.dependency_module, self.dependency_class) yield dependency_class()
def run(self): guesser_class = get_class(self.guesser_module, self.guesser_class) guesser_directory = AbstractGuesser.output_path( self.guesser_module, self.guesser_class, self.config_num, '') guesser_instance = guesser_class(self.config_num) guesser_instance.create_report(guesser_directory, self.fold)