def save(self, path: RichPath) -> None: variables_to_save = list(set(self.__sess.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))) weights_to_save = self.__sess.run(variables_to_save) weights_to_save = {var.name: value for (var, value) in zip(variables_to_save, weights_to_save)} data_to_save = { "model_type": type(self).__name__, "hyperparameters": self.hyperparameters, "metadata": self.__metadata, "weights": weights_to_save, "run_name": self.__run_name, } path.save_as_compressed_file(data_to_save)
def run_experiment(queries: List[Dict[str, Any]], best_index: int, n_trials: int, dfs: Dict[str, pd.DataFrame], dataset_params: Dict[str, Any], optimizer_params: Dict[str, Any], output_file: RichPath): times: List[float] = [] total_time: float = 0.0 best_count: float = 0.0 avg_times: List[float] = [] best_frac: List[float] = [] selected_index: List[int] = [] optimizer = get_optimizer(optimizer_params['name'], queries, **optimizer_params['params']) for t in range(1, n_trials + 1): start = time.time() query_index, q = optimizer.get_query(time=t) df = merge_frames(q['from'], q['on'], options=[['home', 'away']]) elapsed = time.time() - start optimizer.update(query_index, reward=-elapsed) df.copy() # Avoid time measurements on the first iteration due to caching selected_index.append(query_index) if t > 1: times.append(elapsed) total_time += elapsed avg_times.append(total_time / (t - 1)) best_count += float(query_index == best_index) best_frac.append(best_count / (t - 1)) if t % 100 == 0: print(f'Completed {t} trials') # Collect and write the metrics metrics = dict(times=times, avg_times=avg_times, best_frac=best_frac, selected_index=selected_index) output_file.save_as_compressed_file(metrics)
def run_predict(model_path: RichPath, test_data_path: RichPath, output_file: RichPath): test_run_id = "_".join( [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) test_hyper_overrides = { 'run_id': test_run_id, "dropout_keep_rate": 1.0, } test_data_chunks = test_data_path.get_filtered_files_in_dir('*.jsonl.gz') # Restore model model = model_restore_helper.restore(model_path, is_train=False, hyper_overrides=test_hyper_overrides) def predictions(): for annotation in model.annotate(test_data_chunks): if ignore_annotation(annotation.original_annotation): continue ordered_annotation_predictions = sorted( annotation.predicted_annotation_logprob_dist, key=lambda x: -annotation.predicted_annotation_logprob_dist[ x])[:10] annotation_dict = annotation._asdict() logprobs = annotation_dict['predicted_annotation_logprob_dist'] filtered_logprobs = [] for annot in ordered_annotation_predictions: logprob = float(logprobs[annot]) if annot == '%UNK%' or annot == '%UNKNOWN%': annot = 'typing.Any' filtered_logprobs.append((annot, logprob)) annotation_dict[ 'predicted_annotation_logprob_dist'] = filtered_logprobs yield annotation_dict output_file.save_as_compressed_file(predictions())