Beispiel #1
0
    def run(self):
        guesser_class = get_class(self.guesser_module, self.guesser_class)
        guesser_directory = AbstractGuesser.output_path(
            self.guesser_module, self.guesser_class, self.config_num, "")
        guesser_instance = guesser_class.load(
            guesser_directory)  # type: AbstractGuesser

        if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}:
            char_skip = conf["guesser_char_skip"]
        elif self.fold == c.EXPO_FOLD:
            char_skip = conf["expo_char_skip"]
        else:
            char_skip = conf["buzzer_char_skip"]

        log.info(
            f"Generating and saving guesses for {self.fold} fold with char_skip={char_skip}..."
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     char_skip=char_skip)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...")
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   "char")
        log.info("Done saving guesses")

        log.info(
            f"Generating and saving guesses for {self.fold} fold with full question..."
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     full_question=True)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...")
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   "full")
        log.info("Done saving guesses")

        log.info(
            f"Generating and saving guesses for {self.fold} fold with first sentence"
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     first_sentence=True)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f"Guessing on {self.fold} fold took {elapsed}s, saving guesses...")
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   "first")
        log.info("Done saving guesses")
Beispiel #2
0
    def run(self):
        guesser_class = get_class(self.guesser_module, self.guesser_class)
        guesser_directory = AbstractGuesser.output_path(
            self.guesser_module, self.guesser_class, self.config_num, '')
        guesser_instance = guesser_class.load(
            guesser_directory)  # type: AbstractGuesser

        if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}:
            char_skip = conf['guesser_char_skip']
        else:
            char_skip = conf['buzzer_char_skip']

        log.info(
            f'Generating and saving guesses for {self.fold} fold with char_skip={char_skip}...'
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     char_skip=char_skip)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...')
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   'char')
        log.info('Done saving guesses')

        log.info(
            f'Generating and saving guesses for {self.fold} fold with full question...'
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     full_question=True)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...')
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   'full')
        log.info('Done saving guesses')

        log.info(
            f'Generating and saving guesses for {self.fold} fold with first sentence'
        )
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     first_sentence=True)
        end_time = time.time()
        elapsed = end_time - start_time
        log.info(
            f'Guessing on {self.fold} fold took {elapsed}s, saving guesses...')
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold],
                                   'first')
        log.info('Done saving guesses')
Beispiel #3
0
    def run(self):
        guesser_class = get_class(self.guesser_module, self.guesser_class)
        reporting_directory = AbstractGuesser.reporting_path(
            self.guesser_module, self.guesser_class, self.config_num, "")

        # In the cases of huge parameter sweeps on SLURM its easy to accidentally run out of /fs/ storage.
        # Since we only care about the results we can get them, then delete the models. We can use the regular
        # GuesserReport to preserve the model
        guesser_directory = AbstractGuesser.output_path(
            self.guesser_module, self.guesser_class, self.config_num, "")

        param_path = AbstractGuesser.output_path(
            self.guesser_module,
            self.guesser_class,
            self.config_num,
            f"guesser_params.pickle",
        )
        guesses_files = []
        if os.path.exists(c.QANTA_EXPO_DATASET_PATH):
            folds = [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD, c.EXPO_FOLD]
        else:
            folds = [c.GUESSER_DEV_FOLD, c.GUESSER_TEST_FOLD]

        for f in folds:
            guesses_files.extend([
                f"guesses_char_{f}.pickle",
                f"guesses_full_{f}.pickle",
                f"guesses_first_{f}.pickle",
            ])

        guesses_paths = [
            AbstractGuesser.output_path(self.guesser_module,
                                        self.guesser_class, self.config_num, f)
            for f in guesses_files
        ]

        log.info(f'Running: "cp {param_path} {reporting_directory}"')
        shell(f"cp {param_path} {reporting_directory}")

        for g_path in guesses_paths:
            log.info(f'Running: "cp {g_path} {reporting_directory}"')
            shell(f"cp {g_path} {reporting_directory}")

        guesser_instance = guesser_class(self.config_num)
        for f in folds:
            guesser_instance.create_report(reporting_directory, f)

        log.info(f'Running: "rm -rf {guesser_directory}"')
        shell(f"rm -rf {guesser_directory}")
        for g_path in guesses_paths:
            shell(f"rm -f {g_path}")
Beispiel #4
0
    def output(self):
        guesser_class = get_class(self.guesser_module, self.guesser_class)
        guesser_targets = [
            LocalTarget(file) for file in guesser_class.files(
                AbstractGuesser.output_path(self.guesser_module,
                                            self.guesser_class, ''))
        ]

        return [
            LocalTarget(
                AbstractGuesser.output_path(self.guesser_module,
                                            self.guesser_class, '')),
            LocalTarget(
                AbstractGuesser.output_path(self.guesser_module,
                                            self.guesser_class,
                                            'guesser_params.pickle'))
        ] + guesser_targets
Beispiel #5
0
 def run(self):
     guesser_class = get_class(self.guesser_module, self.guesser_class)
     guesser_instance = guesser_class()  # type: AbstractGuesser
     qb_dataset = guesser_instance.qb_dataset()
     start_time = time.time()
     guesser_instance.train(qb_dataset.training_data())
     end_time = time.time()
     guesser_instance.save(
         AbstractGuesser.output_path(self.guesser_module,
                                     self.guesser_class, ''))
     params = guesser_instance.parameters()
     params['training_time'] = end_time - start_time
     params_path = AbstractGuesser.output_path(self.guesser_module,
                                               self.guesser_class,
                                               'guesser_params.pickle')
     with open(params_path, 'wb') as f:
         pickle.dump(params, f)
Beispiel #6
0
 def run(self):
     guesser_class = get_class(self.guesser_module, self.guesser_class)
     guesser_instance = guesser_class(
         self.config_num)  # type: AbstractGuesser
     qb_dataset = guesser_instance.qb_dataset()
     start_time = time.time()
     guesser_instance.train(qb_dataset.training_data())
     end_time = time.time()
     guesser_instance.save(
         AbstractGuesser.output_path(self.guesser_module,
                                     self.guesser_class, self.config_num,
                                     ""))
     params = guesser_instance.parameters()
     params["training_time"] = end_time - start_time
     params_path = AbstractGuesser.output_path(
         self.guesser_module,
         self.guesser_class,
         self.config_num,
         "guesser_params.pickle",
     )
     with open(params_path, "wb") as f:
         pickle.dump(params, f)
Beispiel #7
0
    def run(self):
        guesser_class = get_class(self.guesser_module, self.guesser_class)
        guesser_directory = AbstractGuesser.output_path(
            self.guesser_module, self.guesser_class, '')
        guesser_instance = guesser_class.load(
            guesser_directory)  # type: AbstractGuesser

        if self.fold in {c.GUESSER_TRAIN_FOLD, c.GUESSER_DEV_FOLD}:
            word_skip = conf['guesser_word_skip']
        else:
            word_skip = conf['buzzer_word_skip']

        log.info(
            'Generating and saving guesses for {} fold with word_skip={}...'.
            format(self.fold, word_skip))
        start_time = time.time()
        guess_df = guesser_instance.generate_guesses(self.n_guesses,
                                                     [self.fold],
                                                     word_skip=word_skip)
        end_time = time.time()
        log.info('Guessing on {} fold took {}s, saving guesses...'.format(
            self.fold, end_time - start_time))
        guesser_class.save_guesses(guess_df, guesser_directory, [self.fold])
        log.info('Done saving guesses')
Beispiel #8
0
 def requires(self):
     yield DownloadData()
     if self.dependency_class is not None and self.dependency_module is not None:
         dependency_class = get_class(self.dependency_module,
                                      self.dependency_class)
         yield dependency_class()
Beispiel #9
0
 def run(self):
     guesser_class = get_class(self.guesser_module, self.guesser_class)
     guesser_directory = AbstractGuesser.output_path(
         self.guesser_module, self.guesser_class, self.config_num, '')
     guesser_instance = guesser_class(self.config_num)
     guesser_instance.create_report(guesser_directory, self.fold)