def save(self, path: RichPath) -> None:
        variables_to_save = list(set(self.__sess.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)))
        weights_to_save = self.__sess.run(variables_to_save)
        weights_to_save = {var.name: value
                           for (var, value) in zip(variables_to_save, weights_to_save)}

        data_to_save = {
                         "model_type": type(self).__name__,
                         "hyperparameters": self.hyperparameters,
                         "metadata": self.__metadata,
                         "weights": weights_to_save,
                         "run_name": self.__run_name,
                       }

        path.save_as_compressed_file(data_to_save)
def run_experiment(queries: List[Dict[str,
                                      Any]], best_index: int, n_trials: int,
                   dfs: Dict[str, pd.DataFrame], dataset_params: Dict[str,
                                                                      Any],
                   optimizer_params: Dict[str, Any], output_file: RichPath):

    times: List[float] = []
    total_time: float = 0.0
    best_count: float = 0.0
    avg_times: List[float] = []
    best_frac: List[float] = []
    selected_index: List[int] = []

    optimizer = get_optimizer(optimizer_params['name'], queries,
                              **optimizer_params['params'])
    for t in range(1, n_trials + 1):
        start = time.time()
        query_index, q = optimizer.get_query(time=t)
        df = merge_frames(q['from'], q['on'], options=[['home', 'away']])
        elapsed = time.time() - start
        optimizer.update(query_index, reward=-elapsed)

        df.copy()

        # Avoid time measurements on the first iteration due to caching
        selected_index.append(query_index)
        if t > 1:
            times.append(elapsed)
            total_time += elapsed
            avg_times.append(total_time / (t - 1))
            best_count += float(query_index == best_index)
            best_frac.append(best_count / (t - 1))

        if t % 100 == 0:
            print(f'Completed {t} trials')

    # Collect and write the metrics
    metrics = dict(times=times,
                   avg_times=avg_times,
                   best_frac=best_frac,
                   selected_index=selected_index)
    output_file.save_as_compressed_file(metrics)
Exemple #3
0
def run_predict(model_path: RichPath, test_data_path: RichPath,
                output_file: RichPath):
    test_run_id = "_".join(
        [time.strftime("%Y-%m-%d-%H-%M-%S"),
         str(os.getpid())])

    test_hyper_overrides = {
        'run_id': test_run_id,
        "dropout_keep_rate": 1.0,
    }

    test_data_chunks = test_data_path.get_filtered_files_in_dir('*.jsonl.gz')

    # Restore model
    model = model_restore_helper.restore(model_path,
                                         is_train=False,
                                         hyper_overrides=test_hyper_overrides)

    def predictions():
        for annotation in model.annotate(test_data_chunks):
            if ignore_annotation(annotation.original_annotation):
                continue
            ordered_annotation_predictions = sorted(
                annotation.predicted_annotation_logprob_dist,
                key=lambda x: -annotation.predicted_annotation_logprob_dist[
                    x])[:10]

            annotation_dict = annotation._asdict()
            logprobs = annotation_dict['predicted_annotation_logprob_dist']
            filtered_logprobs = []
            for annot in ordered_annotation_predictions:
                logprob = float(logprobs[annot])
                if annot == '%UNK%' or annot == '%UNKNOWN%':
                    annot = 'typing.Any'
                filtered_logprobs.append((annot, logprob))
            annotation_dict[
                'predicted_annotation_logprob_dist'] = filtered_logprobs

            yield annotation_dict

    output_file.save_as_compressed_file(predictions())