def execute_feed_forward(head, tail, plotspectrum=True, runneptune=True, use_max=False): neptune.init("OneOneFour/Ising-Model") neptune_tb.integrate_with_tensorflow() ttsg = IsingData(train_ratio=5) ttsg.load_json(tail) if runneptune: exp = neptune.create_experiment(name=f"DFFN on {ttsg.size}x{ttsg.size} on file {tail}", params=PARAMS) if plotspectrum: e_overlap = ttsg.plot_energy_spectrum(20, "energy_spectrum.png") #m_overlap = ttsg.plot_magnetization_spectrum(20, "magnetization_spectrum.png") if runneptune: energy_spectrum_img = Image.open("energy_spectrum.png") magnetization_spectrum_img = Image.open("magnetization_spectrum.png") exp.send_image("energy-spectrum", energy_spectrum_img) exp.send_image("magnetization-spectrum", magnetization_spectrum_img) exp.send_metric("energy-overlap", e_overlap) exp.send_metric("mag-overlap", m_overlap) (train_images, train_labels), (test_images, test_labels), (val_image, val_data) = ttsg.get_data() if PARAMS["randomize_spins"]: train_images = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in train_images]) test_images = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in test_images]) val_image = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in val_image]) train_images = (train_images + 1) / 2 test_images = (test_images + 1) / 2 val_image = (val_image + 1) / 2 callback = callbacks.TensorBoard(log_dir=f"logs\\ffn\\{datetime.now().strftime('%Y%m%d-%H%M%S')}") model, hist_dict = feed_forward(train_images, train_labels, val_image, val_data, callback, ttsg.size) if plotspectrum: pred_label = model.predict(test_images[:3]) # plot_9_with_prediction(test_images[:9], test_labels[:9], pred_label) plot_row_with_prediction(test_images[:3], test_labels[:3], pred_label) max_acc = max(hist_dict["val_acc"]) loss, acc = model.evaluate(test_images, test_labels) print(f"Model Accuracy on test set:{acc}") if runneptune: exp.send_artifact(tail) exp.send_text("test-accuracy", str(acc)) exp.send_metric("max_acc", max_acc) exp.send_text("test-loss", str(loss)) exp.send_text("file-name", tail) name = f"FFN_weights {datetime.now().strftime('%Y_%m_%d %H_%M')}.h5" model.save_weights(name) exp.send_artifact(name) exp.stop() if use_max: return loss, max_acc else: return loss, acc
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') if FLAGS.run_mode == 'actor': actor.actor_loop(env.create_environment) elif FLAGS.run_mode == 'learner': neptune.init('do-not-be-hasty/matrace') neptune.create_experiment(tags=[FLAGS.nonce]) neptune_tensorboard.integrate_with_tensorflow() learner.learner_loop(env.create_environment, create_agent, create_optimizer) elif FLAGS.run_mode == 'visualize': visualize.visualize(env.create_environment, create_agent, create_optimizer) else: raise ValueError('Unsupported run mode {}'.format(FLAGS.run_mode))
def run_neptune(head, tail): neptune.init(project_qualified_name="OneOneFour/Ising-Model") neptune_tb.integrate_with_tensorflow() ttf = IsingData(train_ratio=1, test_ratio=0.5, validation_ratio=0.20) ttf.load_json(tail) (train_image, train_label), (test_image, test_label), (val_image, val_label) = ttf.get_data() # normalise and reshape train_image = train_image.reshape( (len(train_image), ttf.size, ttf.size, 1)) test_image = test_image.reshape((len(test_image), ttf.size, ttf.size, 1)) val_image = val_image.reshape((len(val_image), ttf.size, ttf.size, 1)) exp_name = f"Convolutional {tail} {datetime.now().strftime('%Y_%m_%d')}" with neptune.create_experiment(name=exp_name, params=PARAMS) as exp: logdir = "..\\logs\\fit\\" + datetime.now().strftime("%Y%m%d-%H%M%S") callback = TensorBoard( log_dir=logdir) # Make sure to save callback as a regular variable model = get_convolutional_network( ttf.size, exp.get_parameters()['periodic_padding']) model.compile(optimizer=exp.get_parameters()['optimizer'], loss=exp.get_parameters()['loss'], metrics=ast.literal_eval( exp.get_parameters()['metrics'])) history = model.fit(train_image, train_label, epochs=PARAMS['epochs'], validation_data=(val_image, val_label), callbacks=[callback], batch_size=PARAMS['batch_size']) print(model.summary()) loss, acc = model.evaluate(test_image, test_label) print(f"Model accuracy: {acc}") exp.send_text("test-accuracy", str(acc)) exp.send_text("test-loss", str(loss)) weights_name = f"convolutional_weights {datetime.now().strftime('%Y_%m_%d %H_%M')}.h5" model.save_weights(weights_name) exp.send_artifact(weights_name) return acc
def feed_forward_residual(head, tail): neptune.init("OneOneFour/Ising-Model") neptune_tb.integrate_with_tensorflow() ising_data = IsingData(train_ratio=5) ising_data.load_json(tail) (train_data, train_labels), (test_data, test_labels), (val_data, val_labels) = ising_data.get_data() if PARAMS["randomize_spins"]: train_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in train_data]) test_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in test_data]) val_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in val_data]) with neptune.create_experiment(name=f"Residual feed forward") as exp: tb_callback = callbacks.TensorBoard(log_dir=f"logs\\ffn\\{datetime.now().strftime('%Y%m%d-%H%M%S')}") input = Input(shape=(ising_data.size, ising_data.size,)) flatten = layers.Flatten()(input) first = layers.Dense(20, activation="relu")(flatten) second = layers.Dense(20, activation="relu")(first) transformation = layers.Dense(20)(first) first_add = layers.add([transformation, second]) third = layers.Dense(20, activation="relu")(first_add) second_transformation = layers.Dense(20)(first_add) second_add = layers.add([third, second_transformation]) dropout = layers.Dropout(0.3)(second_add) fourth = layers.Dense(1, activation="sigmoid")(dropout) # out = layers.concatenate([fourth, flatten]) model = models.Model(inputs=input, outputs=fourth) model.compile(optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"]) history = model.fit(train_data, train_labels, validation_data=(val_data, val_labels), epochs=50, callbacks=[tb_callback]) loss, acc = model.evaluate(test_data, test_labels) return loss, acc
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') if FLAGS.run_mode == 'actor': if not FLAGS.is_local: get_configuration(config_file=FLAGS.mrunner_config, inject_parameters_to_FLAGS=True) actor.actor_loop(env.create_environment) elif FLAGS.run_mode == 'learner': if not FLAGS.is_local: get_configuration(config_file=FLAGS.mrunner_config, print_diagnostics=True, with_neptune=True, inject_parameters_to_FLAGS=True) experiment = neptune.get_experiment() experiment.append_tag(tag=FLAGS.nonce) neptune_tensorboard.integrate_with_tensorflow() learner.learner_loop(env.create_environment, create_agent, create_optimizer) elif FLAGS.run_mode == 'visualize': visualize.visualize(env.create_environment, create_agent, create_optimizer) else: raise ValueError('Unsupported run mode {}'.format(FLAGS.run_mode))
#model.compile(optimizer=optimizer, loss=cust_loss(custLossThresh), metrics = ['accuracy',mae,dice_coef], run_eagerly=True) model.compile(optimizer=optimizer, loss=cust_loss(custLossThresh), metrics = ['mae',cust_accuracy(custLossThresh),cust_mae(custLossThresh),dice_coef], run_eagerly=True) # # set up callback functions # tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_path, checkpoint_filename), monitor='loss', verbose=1, save_best_only=True, mode='min') # # Adam optimizer adaptively computes updates to the learning rate # so scheduler is taken out for this optimizer # LRS = tf.keras.callbacks.LearningRateScheduler(scheduler) # # run the model # neptune_tb.integrate_with_tensorflow() model.fit(train_dataset, epochs=epochs, validation_data=val_dataset, callbacks=[tensorboard, checkpoint, NeptuneMonitor()]) # # Send signal to neptune that the run is done # neptune.stop() # # save the model from the last epoch # t = time.strftime("%Y_%m_%d_%H_%M", time.localtime()) model.save(output_model.format(t))
# Step 1: Initialize Neptune import neptune neptune.init(api_token='ANONYMOUS', project_qualified_name='shared/tensorboard-integration') # Step 2: Create an experiment neptune.create_experiment('tensorboard-logging') # Step 3: Run ``neptune_tensorboard.integrate_with_tensorflow()`` import neptune_tensorboard neptune_tensorboard.integrate_with_tensorflow() # Step 4: Add your training code import tensorflow as tf import datetime mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 def create_model(): return tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)),
def main(cfg: DictConfig) -> None: tf.config.threading.set_inter_op_parallelism_threads(cfg.tf.threads.inter) tf.config.threading.set_intra_op_parallelism_threads(cfg.tf.threads.intra) # `import dgl` initializes TensorFlow context. The parallelism needs to be configured before the context is initialized. For this reason importing the modules that transitively import `dgl` is delayed. from questions.graphifier import Graphifier from questions import models logging.basicConfig(level=cfg.log_level) logging.getLogger('matplotlib').setLevel(logging.INFO) tf.random.set_seed(0) tf.config.run_functions_eagerly(cfg.tf.run_eagerly) tf.summary.experimental.set_step(0) if cfg.recursion_limit is not None: sys.setrecursionlimit(cfg.recursion_limit) # Neptune if cfg.neptune.enabled: neptune.init(project_qualified_name=cfg.neptune.project_qualified_name) neptune.create_experiment(params=flatten_config(cfg), logger=logging.getLogger(), upload_source_files=map(hydra.utils.to_absolute_path, cfg.neptune.experiment.upload_source_files), **{k: v for k, v in OmegaConf.to_container(cfg.neptune.experiment).items() if k != 'upload_source_files'}) neptune_tensorboard.integrate_with_tensorflow(prefix=True) logging.info(f'Working directory: {os.getcwd()}') neptune.set_property('cwd', os.getcwd()) neptune.set_property('original_cwd', hydra.utils.get_original_cwd()) neptune.set_property('cwd_relpath', os.path.relpath(os.getcwd(), hydra.utils.get_original_cwd())) logging.info('Python recursion limit: %d', sys.getrecursionlimit()) neptune.set_property('recursion_limit', sys.getrecursionlimit()) logging.info('TensorFlow inter-op parallelism threads: %d', tf.config.threading.get_inter_op_parallelism_threads()) logging.info('TensorFlow intra-op parallelism threads: %d', tf.config.threading.get_intra_op_parallelism_threads()) logging.info('TensorFlow physical devices: %s', tf.config.experimental.list_physical_devices()) neptune.set_property('tf.physical_devices', tf.config.experimental.list_physical_devices()) logging.info(f'Joblib cache location: {memory.location}') neptune.set_property('joblib.cache.location', memory.location) writer_train = tf.summary.create_file_writer('train') with writer_train.as_default(): # https://stackoverflow.com/a/61106106/4054250 args_series = pd.Series(cfg.__dict__, name='value') args_series.index.name = 'argument' tf.summary.text('args', args_series.to_markdown()) tf.summary.text('command', ' '.join(sys.argv)) logging.info('Command: %s', ' '.join(sys.argv)) neptune.set_property('command', ' '.join(sys.argv)) tf.summary.text('hostname', socket.gethostname()) logging.info(f'Hostname: {socket.gethostname()}') patterns = cfg.problems.patterns if cfg.problems.pattern_list is not None: with open(hydra.utils.to_absolute_path(cfg.problems.pattern_list)) as f: patterns += list(l.rstrip() for l in f) def normalize_pattern(pattern): if re.match( r'^(?P<name>(?P<domain>[A-Z]{3})(?P<number>[0-9]{3})(?P<form>[-+^=_])(?P<version>[1-9])(?P<size_parameters>[0-9]*(\.[0-9]{3})*))$', pattern): # The pattern is a problem name without a file extension. # Append the file extension '.p'. pattern = f'{pattern}.p' m = re.match( r'^(?P<name>(?P<domain>[A-Z]{3})(?P<number>[0-9]{3})(?P<form>[-+^=_])(?P<version>[1-9])(?P<size_parameters>[0-9]*(\.[0-9]{3})*))(?:\.[pg])$', pattern) if m: # The pattern is a problem base name without domain directory name. # Prepend the domain directory name. pattern = os.path.join(m['domain'], pattern) return pattern patterns = list(map(normalize_pattern, patterns)) clausifier = Solver(**OmegaConf.to_container(cfg.clausifier)) solver = Solver(**OmegaConf.to_container(cfg.solver)) with joblib.parallel_backend('threading', n_jobs=cfg.jobs), joblib.Parallel(verbose=10) as parallel: # Collect problem datasets # We need to split problems first and then collect questions for each of the datasets # because not all problems have questions and we only generate questions samples # for problems with at least one question. if cfg.problems.train is not None and cfg.problems.val is not None: problems = { 'val': tf.data.TextLineDataset(hydra.utils.to_absolute_path(cfg.problems.val)), 'train': tf.data.TextLineDataset(hydra.utils.to_absolute_path(cfg.problems.train)) } problems_all = problems['val'].concatenate(problems['train']) else: logging.info('Collecting available problems...') if cfg.problems.names is None: problems_all = datasets.problems.get_dataset(patterns) else: problems_all = tf.data.TextLineDataset(cfg.problems.names) save_problems(problems_all, os.path.join('problems', 'all.txt')) if cfg.problems.max_count is not None: problems_all = problems_all.take(cfg.problems.max_count) save_problems(problems_all, os.path.join('problems', 'taken.txt')) n_problems = cardinality_finite(problems_all) logging.info('Number of problems available: %d', n_problems) assert 0 <= cfg.validation_split <= 1 problems_validation_count = tf.cast(tf.round(tf.cast(n_problems, tf.float32) * cfg.validation_split), tf.int64) assert problems_validation_count >= 0 problems = { 'val': problems_all.take(problems_validation_count), 'train': problems_all.skip(problems_validation_count) } logging.info('Number of problems taken: %d', cardinality_finite(problems_all)) neptune.set_property('problems/taken', cardinality_finite(problems_all)) problem_records = {p: {**tptp.problem_properties(p), **{f'dataset_{k}': False for k in problems}} for p in map(py_str, problems_all)} problem_records_types = {**tptp.property_types, **{f'dataset_{k}': np.bool for k in problems}} for k, p in problems.items(): logging.info(f'Number of {k} problems: {cardinality_finite(p)}') neptune.set_property(f'problems/taken/{k}', cardinality_finite(p)) save_problems(p, os.path.join('problems', 'dataset', f'{k}.txt')) for pp in map(py_str, p): problem_records[pp][f'dataset_{k}'] = True # Generate questions with writer_train.as_default(): if cfg.questions.dir_legacy is None: questions_dir = cfg.questions.dir if questions_dir is None: questions_dir = 'questions' else: questions_dir = hydra.utils.to_absolute_path(questions_dir) try: generator = Generator.load(questions_dir) logging.info('Generator loaded.') if any(l != r for l, r in itertools.zip_longest(generator.problems, map(py_str, problems_all))): raise RuntimeError('Loaded generator uses different problems.') if set(generator.randomize) != set(cfg.questions.randomize): raise RuntimeError( f'Loaded generator randomizes different symbol type. Expected: {cfg.questions.randomize}. Actual: {generator.randomize}.') if generator.background != cfg.questions.background: raise RuntimeError( f'Loaded generator uses a different background. Expected: {cfg.questions.background}. Actual: {generator.background}.') if generator.metric != cfg.questions.metric: raise RuntimeError( f'Loaded generator uses a different metric. Expected: {cfg.questions.metric}. Actual: {generator.metric}.') except FileNotFoundError: generator = Generator.fresh(list(map(py_str, problems_all)), clausifier, randomize=cfg.questions.randomize, hoeffding_exponent=cfg.questions.hoeffding_exponent, background=cfg.questions.background, metric=cfg.questions.metric) logging.info('Starting generating questions from scratch.') with writer_train.as_default(): questions_all = generator.generate(solver, num_questions_per_batch=cfg.questions.batch_size, num_questions_per_problem=cfg.questions.max_per_problem, dir=questions_dir, num_questions=cfg.questions.max_count) else: # TODO?: Only load questions if the batches are not cached. questions_file = os.path.join(hydra.utils.to_absolute_path('cache'), f'symbol_type_{cfg.symbol_types[0]}', f'max_questions_per_problem_{cfg.questions.max_per_problem}', 'questions.pkl') # Here we load the raw, un-normalized questions (oriented element-wise differences of inverse precedences). questions_all = datasets.questions.load_questions.load(questions_file, cfg.questions.dir_legacy, cfg.questions.max_per_problem) neptune.set_property('problems/with_questions', len(questions_all)) question_counts = [q.shape[0] for q in questions_all.values()] signature_lengths = [q.shape[1] for q in questions_all.values()] try: print(f'Question counts: {scipy.stats.describe(question_counts)}') except ValueError: pass try: print(f'Signature sizes: {scipy.stats.describe(signature_lengths)}') except ValueError: pass df_index = pd.Index(questions_all.keys(), name='name') df = pd.DataFrame({ 'n_questions': pd.Series(question_counts, index=df_index, dtype=pd.UInt32Dtype(), name='n_questions'), 'n_symbols': pd.Series(signature_lengths, index=df_index, dtype=pd.UInt32Dtype(), name='n_symbols') }, index=df_index) save_df(df, os.path.join('problems', 'with_questions')) figure = plt.figure(figsize=(8, 8)) plt.title('Problems with questions') sns.scatterplot(x=signature_lengths, y=question_counts) plt.xlabel('Symbols') plt.ylabel('Questions') plt.xscale('log') plt.yscale('log') plt.savefig(os.path.join('problems', 'with_questions.png')) image = plot.plot_to_image(figure) tf.summary.image('Problems with questions', image) for k, v in problem_records.items(): if k in questions_all: v['num_questions'] = questions_all[k].shape[0] v['num_symbols'] = questions_all[k].shape[1] else: v['num_questions'] = 0 problem_records_types.update({'num_questions': pd.UInt32Dtype(), 'num_symbols': pd.UInt32Dtype()}) # Graphify problems max_num_nodes = None for k in problems: if cfg.gcn.max_problem_nodes[k] is not None: if max_num_nodes is None: max_num_nodes = cfg.gcn.max_problem_nodes[k] else: max_num_nodes = max(max_num_nodes, cfg.gcn.max_problem_nodes[k]) graphifier = Graphifier(clausifier, max_number_of_nodes=max_num_nodes) graphs, graphs_df = graphifier.get_graphs_dict(OrderedSet(map(py_str, problems_all))) clause_types = {name: tf.reduce_sum(tf.cast(graph.ndata['feat']['clause'], tf.uint32), axis=0).numpy() for name, graph in graphs.items()} columns = ['AXIOM', 'ASSUMPTION', 'CONJECTURE', 'NEGATED_CONJECTURE', 'CLAIM', 'EXTENSIONALITY_AXIOM', 'MODEL_DEFINITION'] columns = [('clause_type', c) for c in columns] dtypes = {c: pd.UInt32Dtype() for c in columns} df_clause_types = dataframe_from_records(clause_types, columns=columns, dtypes=dtypes) graphs_df = graphs_df.join(df_clause_types, rsuffix='_clause_type') for symbol_type in ('predicate', 'function'): features = {name: tf.reduce_sum(tf.cast(graph.ndata['feat'][symbol_type], tf.uint32), axis=0).numpy() for name, graph in graphs.items()} columns = graphifier.symbol_feature_columns columns = [(symbol_type, c) for c in columns] df_features = dataframe_from_records(features, columns=columns, dtypes={c: pd.UInt32Dtype() for c in columns}) graphs_df = graphs_df.join(df_features) for problem_name, rec in graphs_df.iterrows(): problem_records[problem_name].update(rec.to_dict()) logging.info(f'Number of problems graphified: {len(graphs)}') neptune.set_property('problems/graphified', len(graphs)) save_df(graphs_df, 'graphs') if cfg.symbol_cost.model == 'gcn': # Drop problems that have too large graphs for k, v in problems.items(): num_before = cardinality_finite(v) def is_sufficiently_small_py(problem): if cfg.gcn.max_problem_nodes[k] is None: return True num_nodes = graphs_df['graph_nodes'][py_str(problem)] if pd.notna(num_nodes) and num_nodes <= cfg.gcn.max_problem_nodes[k]: return True return False def is_sufficiently_small_tf(problem): return tf.py_function(is_sufficiently_small_py, [problem], tf.bool) problems[k] = v.filter(is_sufficiently_small_tf) num_after = cardinality_finite(problems[k]) logging.info( f'{k}: {num_after}/{num_before} problems kept because their size is at most {cfg.gcn.max_problem_nodes[k]}.') questions = {} question_batches = {} problems_with_questions = {} for k, p in problems.items(): q = datasets.questions.individual.dict_to_dataset(questions_all, p, normalize=cfg.questions.normalize).cache() if dataset_is_empty(q): warnings.warn(f'Dataset \'{k}\' is empty.') questions[k] = q batch_size = {'train': cfg.batch_size.train, 'val': cfg.batch_size.val}[k] question_batches[k] = datasets.questions.batch.batch(q, batch_size).cache() problems_with_questions[k] = [pp for pp in map(py_str, p) if pp in questions_all] logging.info(f'Number of {k} problems with questions: {len(problems_with_questions[k])}') neptune.set_property(f'problems/with_questions/{k}', len(problems_with_questions[k])) checkpoint_dir = 'tf_ckpts' epoch_ckpt_dir = os.path.join(checkpoint_dir, 'epoch') os.makedirs(epoch_ckpt_dir, exist_ok=True) for f in glob.iglob(os.path.join(epoch_ckpt_dir, 'weights.*.tf.*')): os.remove(f) acc_ckpt_dir = os.path.join(checkpoint_dir, 'val_binary_accuracy') os.makedirs(acc_ckpt_dir, exist_ok=True) for f in glob.iglob(os.path.join(acc_ckpt_dir, 'weights.*.tf.*')): os.remove(f) success_ckpt_dir = os.path.join(checkpoint_dir, 'val_solver_success_rate') os.makedirs(success_ckpt_dir, exist_ok=True) for f in glob.iglob(os.path.join(success_ckpt_dir, 'weights.*.tf.*')): os.remove(f) tensorboard = callbacks.TensorBoard(log_dir='.', profile_batch=cfg.tb.profile_batch, histogram_freq=1, embeddings_freq=1) cbs = [ tensorboard, callbacks.Time( problems={k: next(iter(v.take(32).batch(32))) for k, v in problems.items() if not dataset_is_empty(v)}, tensorboard=tensorboard), tf.keras.callbacks.CSVLogger('epochs.csv'), tf.keras.callbacks.ModelCheckpoint( os.path.join(epoch_ckpt_dir, 'weights.{epoch:05d}.tf'), save_weights_only=True, verbose=0), tf.keras.callbacks.ModelCheckpoint( os.path.join(acc_ckpt_dir, 'weights.{epoch:05d}-{val_binary_accuracy:.2f}.tf'), save_weights_only=True, verbose=1, monitor='val_binary_accuracy', save_best_only=True), tf.keras.callbacks.EarlyStopping(**cfg.early_stopping), tf.keras.callbacks.ReduceLROnPlateau(**cfg.reduce_lr_on_plateau) ] solver_eval_problems = None if cfg.solver_eval.start is not None or cfg.solver_eval.step is not None: solver_eval_problems = problems['val'] if cfg.solver_eval.problems.val is not None and cfg.solver_eval.problems.val >= 0: solver_eval_problems = solver_eval_problems.take(cfg.solver_eval.problems.val) if cfg.solver_eval.train_without_questions: solver_eval_problems_train = problems['train'] else: solver_eval_problems_train = tf.data.Dataset.from_tensor_slices(problems_with_questions['train']) if cfg.solver_eval.problems.train is not None and cfg.solver_eval.problems.train >= 0: solver_eval_problems_train = solver_eval_problems_train.take(cfg.solver_eval.problems.train) if not dataset_is_empty(solver_eval_problems_train): solver_eval_problems = solver_eval_problems.concatenate(solver_eval_problems_train) solver_eval_problems = list(OrderedSet(map(py_str, solver_eval_problems))) save_df(dataframe_from_records(list(problem_records.values()), index_keys='name', dtypes=problem_records_types), 'problems') logit_models = {} for symbol_type in cfg.symbol_types: model_logit = get_model_logit(cfg, questions_all, clausifier, cbs, tensorboard, graphifier, symbol_type) logit_models[symbol_type] = model_logit if symbol_type in cfg.restore_checkpoint: filename = cfg.restore_checkpoint[symbol_type] if filename is None: continue model_logit.load_weights(hydra.utils.to_absolute_path(filename)) logging.info(f'Checkpoint restored: {hydra.utils.to_absolute_path(filename)}') model_logit = next(iter(logit_models.values())) model_symbol_cost = model_logit.symbol_cost_model # We need to set_model before we begin using tensorboard. Tensorboard is used in other callbacks in symbol cost evaluation. tensorboard.set_model(model_logit) if solver_eval_problems is not None: problem_categories = { 'all': None, 'with_questions': questions_all.keys(), 'graphified': graphs.keys(), 'with_questions&graphified': OrderedSet(questions_all.keys()) & graphs.keys() } for cat_name, cat_filename in cfg.solver_eval.problem_set: with open(cat_filename) as f: problem_categories[cat_name] = [l.rstrip('\n') for l in f] symbol_cost_evaluation_callback = callbacks.SymbolCostEvaluation( cfg.solver_eval, 'epochs_solver_eval.csv', solver=solver, problems=solver_eval_problems, splits={k: list(map(py_str, v)) for k, v in problems.items()}, symbol_type=cfg.symbol_types[0], tensorboard=tensorboard, problem_categories=problem_categories, baseline=cfg.symbol_cost.model == 'baseline', parallel=parallel) cbs.append(symbol_cost_evaluation_callback) for name, d in cfg.solver_eval.baselines.items(): df = pd.read_pickle(hydra.utils.to_absolute_path(d.filename)) logs = symbol_cost_evaluation_callback.evaluate_dataframe(df, name, d.iterations) print(f'Baseline \'{name}\':\n{yaml.dump(logs)}') if symbol_cost_evaluation_callback.start <= -1: print(f'Initial evaluation of the symbol cost model...') sc_models = {k: v.symbol_cost_model for k, v in logit_models.items()} symbol_cost_evaluation_callback.evaluate(sc_models, epoch=-1) if not isinstance(model_symbol_cost, models.symbol_cost.Baseline): if cfg.initial_eval: for k in question_batches: print(f'Initial evaluation of the logit model on {k} questions...') if k == 'train': x = datasets.questions.batch.batch(questions[k], cfg.batch_size.val) else: x = question_batches[k] metrics = model_logit.evaluate(x, return_dict=True) print(f'Initial evaluation on {k} set: {metrics}') if cfg.initial_evaluation_extra: initial_evaluation(model_logit, questions_all, problems_all, cfg.batch_size.train) if cfg.epochs >= 1: print('Training...') model_logit.fit(question_batches['train'], validation_data=question_batches['val'], epochs=cfg.epochs, callbacks=cbs)