Example #1
0
def execute_feed_forward(head, tail, plotspectrum=True, runneptune=True, use_max=False):
    neptune.init("OneOneFour/Ising-Model")
    neptune_tb.integrate_with_tensorflow()
    ttsg = IsingData(train_ratio=5)
    ttsg.load_json(tail)
    if runneptune:
        exp = neptune.create_experiment(name=f"DFFN on {ttsg.size}x{ttsg.size} on file {tail}", params=PARAMS)
    if plotspectrum:
        e_overlap = ttsg.plot_energy_spectrum(20, "energy_spectrum.png")
        #m_overlap = ttsg.plot_magnetization_spectrum(20, "magnetization_spectrum.png")
        if runneptune:
            energy_spectrum_img = Image.open("energy_spectrum.png")
            magnetization_spectrum_img = Image.open("magnetization_spectrum.png")

            exp.send_image("energy-spectrum", energy_spectrum_img)
            exp.send_image("magnetization-spectrum", magnetization_spectrum_img)

            exp.send_metric("energy-overlap", e_overlap)
            exp.send_metric("mag-overlap", m_overlap)

    (train_images, train_labels), (test_images, test_labels), (val_image, val_data) = ttsg.get_data()

    if PARAMS["randomize_spins"]:
        train_images = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in train_images])
        test_images = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in test_images])
        val_image = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in val_image])

    train_images = (train_images + 1) / 2
    test_images = (test_images + 1) / 2
    val_image = (val_image + 1) / 2

    callback = callbacks.TensorBoard(log_dir=f"logs\\ffn\\{datetime.now().strftime('%Y%m%d-%H%M%S')}")
    model, hist_dict = feed_forward(train_images, train_labels, val_image, val_data, callback, ttsg.size)

    if plotspectrum:
        pred_label = model.predict(test_images[:3])
        # plot_9_with_prediction(test_images[:9], test_labels[:9], pred_label)
        plot_row_with_prediction(test_images[:3], test_labels[:3], pred_label)
    max_acc = max(hist_dict["val_acc"])

    loss, acc = model.evaluate(test_images, test_labels)

    print(f"Model Accuracy on test set:{acc}")
    if runneptune:
        exp.send_artifact(tail)
        exp.send_text("test-accuracy", str(acc))
        exp.send_metric("max_acc", max_acc)
        exp.send_text("test-loss", str(loss))
        exp.send_text("file-name", tail)
        name = f"FFN_weights {datetime.now().strftime('%Y_%m_%d %H_%M')}.h5"
        model.save_weights(name)
        exp.send_artifact(name)
        exp.stop()
    if use_max:
        return loss, max_acc
    else:
        return loss, acc
Example #2
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')
    if FLAGS.run_mode == 'actor':
        actor.actor_loop(env.create_environment)
    elif FLAGS.run_mode == 'learner':
        neptune.init('do-not-be-hasty/matrace')
        neptune.create_experiment(tags=[FLAGS.nonce])
        neptune_tensorboard.integrate_with_tensorflow()

        learner.learner_loop(env.create_environment, create_agent,
                             create_optimizer)
    elif FLAGS.run_mode == 'visualize':
        visualize.visualize(env.create_environment, create_agent,
                            create_optimizer)
    else:
        raise ValueError('Unsupported run mode {}'.format(FLAGS.run_mode))
def run_neptune(head, tail):
    neptune.init(project_qualified_name="OneOneFour/Ising-Model")
    neptune_tb.integrate_with_tensorflow()
    ttf = IsingData(train_ratio=1, test_ratio=0.5, validation_ratio=0.20)
    ttf.load_json(tail)
    (train_image, train_label), (test_image,
                                 test_label), (val_image,
                                               val_label) = ttf.get_data()

    # normalise and reshape

    train_image = train_image.reshape(
        (len(train_image), ttf.size, ttf.size, 1))
    test_image = test_image.reshape((len(test_image), ttf.size, ttf.size, 1))
    val_image = val_image.reshape((len(val_image), ttf.size, ttf.size, 1))

    exp_name = f"Convolutional {tail} {datetime.now().strftime('%Y_%m_%d')}"
    with neptune.create_experiment(name=exp_name, params=PARAMS) as exp:
        logdir = "..\\logs\\fit\\" + datetime.now().strftime("%Y%m%d-%H%M%S")
        callback = TensorBoard(
            log_dir=logdir)  # Make sure to save callback as a regular variable
        model = get_convolutional_network(
            ttf.size,
            exp.get_parameters()['periodic_padding'])
        model.compile(optimizer=exp.get_parameters()['optimizer'],
                      loss=exp.get_parameters()['loss'],
                      metrics=ast.literal_eval(
                          exp.get_parameters()['metrics']))

        history = model.fit(train_image,
                            train_label,
                            epochs=PARAMS['epochs'],
                            validation_data=(val_image, val_label),
                            callbacks=[callback],
                            batch_size=PARAMS['batch_size'])
        print(model.summary())
        loss, acc = model.evaluate(test_image, test_label)
        print(f"Model accuracy: {acc}")
        exp.send_text("test-accuracy", str(acc))
        exp.send_text("test-loss", str(loss))
        weights_name = f"convolutional_weights {datetime.now().strftime('%Y_%m_%d %H_%M')}.h5"
        model.save_weights(weights_name)
        exp.send_artifact(weights_name)
    return acc
Example #4
0
def feed_forward_residual(head, tail):
    neptune.init("OneOneFour/Ising-Model")
    neptune_tb.integrate_with_tensorflow()

    ising_data = IsingData(train_ratio=5)
    ising_data.load_json(tail)

    (train_data, train_labels), (test_data, test_labels), (val_data, val_labels) = ising_data.get_data()

    if PARAMS["randomize_spins"]:
        train_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in train_data])
        test_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in test_data])
        val_data = np.array([t * -1 if np.random.uniform(0, 1) > 0.5 else t for t in val_data])

    with neptune.create_experiment(name=f"Residual feed forward") as exp:
        tb_callback = callbacks.TensorBoard(log_dir=f"logs\\ffn\\{datetime.now().strftime('%Y%m%d-%H%M%S')}")
        input = Input(shape=(ising_data.size, ising_data.size,))
        flatten = layers.Flatten()(input)
        first = layers.Dense(20, activation="relu")(flatten)
        second = layers.Dense(20, activation="relu")(first)
        transformation = layers.Dense(20)(first)
        first_add = layers.add([transformation, second])
        third = layers.Dense(20, activation="relu")(first_add)
        second_transformation = layers.Dense(20)(first_add)
        second_add = layers.add([third, second_transformation])
        dropout = layers.Dropout(0.3)(second_add)
        fourth = layers.Dense(1, activation="sigmoid")(dropout)
        # out = layers.concatenate([fourth, flatten])
        model = models.Model(inputs=input, outputs=fourth)

        model.compile(optimizer="sgd", loss="binary_crossentropy", metrics=["accuracy"])
        history = model.fit(train_data, train_labels, validation_data=(val_data, val_labels), epochs=50,
                            callbacks=[tb_callback])

        loss, acc = model.evaluate(test_data, test_labels)

        return loss, acc
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')
    if FLAGS.run_mode == 'actor':
        if not FLAGS.is_local:
            get_configuration(config_file=FLAGS.mrunner_config,
                              inject_parameters_to_FLAGS=True)
        actor.actor_loop(env.create_environment)
    elif FLAGS.run_mode == 'learner':
        if not FLAGS.is_local:
            get_configuration(config_file=FLAGS.mrunner_config,
                              print_diagnostics=True,
                              with_neptune=True,
                              inject_parameters_to_FLAGS=True)
            experiment = neptune.get_experiment()
            experiment.append_tag(tag=FLAGS.nonce)
            neptune_tensorboard.integrate_with_tensorflow()
        learner.learner_loop(env.create_environment, create_agent,
                             create_optimizer)
    elif FLAGS.run_mode == 'visualize':
        visualize.visualize(env.create_environment, create_agent,
                            create_optimizer)
    else:
        raise ValueError('Unsupported run mode {}'.format(FLAGS.run_mode))
#model.compile(optimizer=optimizer, loss=cust_loss(custLossThresh), metrics = ['accuracy',mae,dice_coef], run_eagerly=True)
model.compile(optimizer=optimizer, loss=cust_loss(custLossThresh), metrics = ['mae',cust_accuracy(custLossThresh),cust_mae(custLossThresh),dice_coef], run_eagerly=True)
#
# set up callback functions
#
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_path, checkpoint_filename), monitor='loss', verbose=1, save_best_only=True, mode='min')

#
# Adam optimizer adaptively computes updates to the learning rate
# so scheduler is taken out for this optimizer
#
LRS = tf.keras.callbacks.LearningRateScheduler(scheduler)

#
# run the model 
#
neptune_tb.integrate_with_tensorflow()
model.fit(train_dataset, epochs=epochs, validation_data=val_dataset, callbacks=[tensorboard, checkpoint, NeptuneMonitor()])

#
# Send signal to neptune that the run is done
#
neptune.stop()

#
# save the model from the last epoch
#
t = time.strftime("%Y_%m_%d_%H_%M", time.localtime())
model.save(output_model.format(t))
# Step 1: Initialize Neptune

import neptune

neptune.init(api_token='ANONYMOUS',
             project_qualified_name='shared/tensorboard-integration')

# Step 2: Create an experiment

neptune.create_experiment('tensorboard-logging')

# Step 3: Run ``neptune_tensorboard.integrate_with_tensorflow()``

import neptune_tensorboard

neptune_tensorboard.integrate_with_tensorflow()

# Step 4: Add your training code

import tensorflow as tf
import datetime

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0


def create_model():
    return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
Example #8
0
def main(cfg: DictConfig) -> None:
    tf.config.threading.set_inter_op_parallelism_threads(cfg.tf.threads.inter)
    tf.config.threading.set_intra_op_parallelism_threads(cfg.tf.threads.intra)

    # `import dgl` initializes TensorFlow context. The parallelism needs to be configured before the context is initialized. For this reason importing the modules that transitively import `dgl` is delayed.
    from questions.graphifier import Graphifier
    from questions import models

    logging.basicConfig(level=cfg.log_level)
    logging.getLogger('matplotlib').setLevel(logging.INFO)
    tf.random.set_seed(0)
    tf.config.run_functions_eagerly(cfg.tf.run_eagerly)
    tf.summary.experimental.set_step(0)
    if cfg.recursion_limit is not None:
        sys.setrecursionlimit(cfg.recursion_limit)

    # Neptune
    if cfg.neptune.enabled:
        neptune.init(project_qualified_name=cfg.neptune.project_qualified_name)
        neptune.create_experiment(params=flatten_config(cfg), logger=logging.getLogger(),
                                  upload_source_files=map(hydra.utils.to_absolute_path,
                                                          cfg.neptune.experiment.upload_source_files),
                                  **{k: v for k, v in OmegaConf.to_container(cfg.neptune.experiment).items() if
                                     k != 'upload_source_files'})
        neptune_tensorboard.integrate_with_tensorflow(prefix=True)

    logging.info(f'Working directory: {os.getcwd()}')
    neptune.set_property('cwd', os.getcwd())
    neptune.set_property('original_cwd', hydra.utils.get_original_cwd())
    neptune.set_property('cwd_relpath', os.path.relpath(os.getcwd(), hydra.utils.get_original_cwd()))

    logging.info('Python recursion limit: %d', sys.getrecursionlimit())
    neptune.set_property('recursion_limit', sys.getrecursionlimit())
    logging.info('TensorFlow inter-op parallelism threads: %d', tf.config.threading.get_inter_op_parallelism_threads())
    logging.info('TensorFlow intra-op parallelism threads: %d', tf.config.threading.get_intra_op_parallelism_threads())
    logging.info('TensorFlow physical devices: %s', tf.config.experimental.list_physical_devices())
    neptune.set_property('tf.physical_devices', tf.config.experimental.list_physical_devices())

    logging.info(f'Joblib cache location: {memory.location}')
    neptune.set_property('joblib.cache.location', memory.location)

    writer_train = tf.summary.create_file_writer('train')
    with writer_train.as_default():
        # https://stackoverflow.com/a/61106106/4054250
        args_series = pd.Series(cfg.__dict__, name='value')
        args_series.index.name = 'argument'
        tf.summary.text('args', args_series.to_markdown())
        tf.summary.text('command', ' '.join(sys.argv))
        logging.info('Command: %s', ' '.join(sys.argv))
        neptune.set_property('command', ' '.join(sys.argv))
        tf.summary.text('hostname', socket.gethostname())
        logging.info(f'Hostname: {socket.gethostname()}')

    patterns = cfg.problems.patterns
    if cfg.problems.pattern_list is not None:
        with open(hydra.utils.to_absolute_path(cfg.problems.pattern_list)) as f:
            patterns += list(l.rstrip() for l in f)

    def normalize_pattern(pattern):
        if re.match(
                r'^(?P<name>(?P<domain>[A-Z]{3})(?P<number>[0-9]{3})(?P<form>[-+^=_])(?P<version>[1-9])(?P<size_parameters>[0-9]*(\.[0-9]{3})*))$',
                pattern):
            # The pattern is a problem name without a file extension.
            # Append the file extension '.p'.
            pattern = f'{pattern}.p'
        m = re.match(
            r'^(?P<name>(?P<domain>[A-Z]{3})(?P<number>[0-9]{3})(?P<form>[-+^=_])(?P<version>[1-9])(?P<size_parameters>[0-9]*(\.[0-9]{3})*))(?:\.[pg])$',
            pattern)
        if m:
            # The pattern is a problem base name without domain directory name.
            # Prepend the domain directory name.
            pattern = os.path.join(m['domain'], pattern)
        return pattern

    patterns = list(map(normalize_pattern, patterns))

    clausifier = Solver(**OmegaConf.to_container(cfg.clausifier))
    solver = Solver(**OmegaConf.to_container(cfg.solver))

    with joblib.parallel_backend('threading', n_jobs=cfg.jobs), joblib.Parallel(verbose=10) as parallel:
        # Collect problem datasets
        # We need to split problems first and then collect questions for each of the datasets
        # because not all problems have questions and we only generate questions samples
        # for problems with at least one question.
        if cfg.problems.train is not None and cfg.problems.val is not None:
            problems = {
                'val': tf.data.TextLineDataset(hydra.utils.to_absolute_path(cfg.problems.val)),
                'train': tf.data.TextLineDataset(hydra.utils.to_absolute_path(cfg.problems.train))
            }
            problems_all = problems['val'].concatenate(problems['train'])
        else:
            logging.info('Collecting available problems...')
            if cfg.problems.names is None:
                problems_all = datasets.problems.get_dataset(patterns)
            else:
                problems_all = tf.data.TextLineDataset(cfg.problems.names)
            save_problems(problems_all, os.path.join('problems', 'all.txt'))
            if cfg.problems.max_count is not None:
                problems_all = problems_all.take(cfg.problems.max_count)
            save_problems(problems_all, os.path.join('problems', 'taken.txt'))
            n_problems = cardinality_finite(problems_all)
            logging.info('Number of problems available: %d', n_problems)
            assert 0 <= cfg.validation_split <= 1
            problems_validation_count = tf.cast(tf.round(tf.cast(n_problems, tf.float32) * cfg.validation_split),
                                                tf.int64)
            assert problems_validation_count >= 0
            problems = {
                'val': problems_all.take(problems_validation_count),
                'train': problems_all.skip(problems_validation_count)
            }
        logging.info('Number of problems taken: %d', cardinality_finite(problems_all))
        neptune.set_property('problems/taken', cardinality_finite(problems_all))

        problem_records = {p: {**tptp.problem_properties(p), **{f'dataset_{k}': False for k in problems}} for p in
                           map(py_str, problems_all)}
        problem_records_types = {**tptp.property_types, **{f'dataset_{k}': np.bool for k in problems}}
        for k, p in problems.items():
            logging.info(f'Number of {k} problems: {cardinality_finite(p)}')
            neptune.set_property(f'problems/taken/{k}', cardinality_finite(p))
            save_problems(p, os.path.join('problems', 'dataset', f'{k}.txt'))
            for pp in map(py_str, p):
                problem_records[pp][f'dataset_{k}'] = True

        # Generate questions
        with writer_train.as_default():
            if cfg.questions.dir_legacy is None:
                questions_dir = cfg.questions.dir
                if questions_dir is None:
                    questions_dir = 'questions'
                else:
                    questions_dir = hydra.utils.to_absolute_path(questions_dir)
                try:
                    generator = Generator.load(questions_dir)
                    logging.info('Generator loaded.')
                    if any(l != r for l, r in itertools.zip_longest(generator.problems, map(py_str, problems_all))):
                        raise RuntimeError('Loaded generator uses different problems.')
                    if set(generator.randomize) != set(cfg.questions.randomize):
                        raise RuntimeError(
                            f'Loaded generator randomizes different symbol type. Expected: {cfg.questions.randomize}. Actual: {generator.randomize}.')
                    if generator.background != cfg.questions.background:
                        raise RuntimeError(
                            f'Loaded generator uses a different background. Expected: {cfg.questions.background}. Actual: {generator.background}.')
                    if generator.metric != cfg.questions.metric:
                        raise RuntimeError(
                            f'Loaded generator uses a different metric. Expected: {cfg.questions.metric}. Actual: {generator.metric}.')
                except FileNotFoundError:
                    generator = Generator.fresh(list(map(py_str, problems_all)), clausifier,
                                                randomize=cfg.questions.randomize,
                                                hoeffding_exponent=cfg.questions.hoeffding_exponent,
                                                background=cfg.questions.background,
                                                metric=cfg.questions.metric)
                    logging.info('Starting generating questions from scratch.')
                with writer_train.as_default():
                    questions_all = generator.generate(solver,
                                                       num_questions_per_batch=cfg.questions.batch_size,
                                                       num_questions_per_problem=cfg.questions.max_per_problem,
                                                       dir=questions_dir,
                                                       num_questions=cfg.questions.max_count)
            else:
                # TODO?: Only load questions if the batches are not cached.
                questions_file = os.path.join(hydra.utils.to_absolute_path('cache'),
                                              f'symbol_type_{cfg.symbol_types[0]}',
                                              f'max_questions_per_problem_{cfg.questions.max_per_problem}',
                                              'questions.pkl')

                # Here we load the raw, un-normalized questions (oriented element-wise differences of inverse precedences).
                questions_all = datasets.questions.load_questions.load(questions_file, cfg.questions.dir_legacy,
                                                                       cfg.questions.max_per_problem)

            neptune.set_property('problems/with_questions', len(questions_all))

            question_counts = [q.shape[0] for q in questions_all.values()]
            signature_lengths = [q.shape[1] for q in questions_all.values()]

            try:
                print(f'Question counts: {scipy.stats.describe(question_counts)}')
            except ValueError:
                pass
            try:
                print(f'Signature sizes: {scipy.stats.describe(signature_lengths)}')
            except ValueError:
                pass

            df_index = pd.Index(questions_all.keys(), name='name')
            df = pd.DataFrame({
                'n_questions': pd.Series(question_counts, index=df_index, dtype=pd.UInt32Dtype(), name='n_questions'),
                'n_symbols': pd.Series(signature_lengths, index=df_index, dtype=pd.UInt32Dtype(), name='n_symbols')
            }, index=df_index)
            save_df(df, os.path.join('problems', 'with_questions'))

            figure = plt.figure(figsize=(8, 8))
            plt.title('Problems with questions')
            sns.scatterplot(x=signature_lengths, y=question_counts)
            plt.xlabel('Symbols')
            plt.ylabel('Questions')
            plt.xscale('log')
            plt.yscale('log')
            plt.savefig(os.path.join('problems', 'with_questions.png'))
            image = plot.plot_to_image(figure)
            tf.summary.image('Problems with questions', image)

        for k, v in problem_records.items():
            if k in questions_all:
                v['num_questions'] = questions_all[k].shape[0]
                v['num_symbols'] = questions_all[k].shape[1]
            else:
                v['num_questions'] = 0
        problem_records_types.update({'num_questions': pd.UInt32Dtype(), 'num_symbols': pd.UInt32Dtype()})

        # Graphify problems
        max_num_nodes = None
        for k in problems:
            if cfg.gcn.max_problem_nodes[k] is not None:
                if max_num_nodes is None:
                    max_num_nodes = cfg.gcn.max_problem_nodes[k]
                else:
                    max_num_nodes = max(max_num_nodes, cfg.gcn.max_problem_nodes[k])
        graphifier = Graphifier(clausifier, max_number_of_nodes=max_num_nodes)
        graphs, graphs_df = graphifier.get_graphs_dict(OrderedSet(map(py_str, problems_all)))
        clause_types = {name: tf.reduce_sum(tf.cast(graph.ndata['feat']['clause'], tf.uint32), axis=0).numpy() for
                        name, graph in graphs.items()}
        columns = ['AXIOM', 'ASSUMPTION', 'CONJECTURE', 'NEGATED_CONJECTURE', 'CLAIM', 'EXTENSIONALITY_AXIOM',
                   'MODEL_DEFINITION']
        columns = [('clause_type', c) for c in columns]
        dtypes = {c: pd.UInt32Dtype() for c in columns}
        df_clause_types = dataframe_from_records(clause_types, columns=columns, dtypes=dtypes)
        graphs_df = graphs_df.join(df_clause_types, rsuffix='_clause_type')
        for symbol_type in ('predicate', 'function'):
            features = {name: tf.reduce_sum(tf.cast(graph.ndata['feat'][symbol_type], tf.uint32), axis=0).numpy() for
                        name, graph in graphs.items()}
            columns = graphifier.symbol_feature_columns
            columns = [(symbol_type, c) for c in columns]
            df_features = dataframe_from_records(features, columns=columns,
                                                 dtypes={c: pd.UInt32Dtype() for c in columns})
            graphs_df = graphs_df.join(df_features)
        for problem_name, rec in graphs_df.iterrows():
            problem_records[problem_name].update(rec.to_dict())
        logging.info(f'Number of problems graphified: {len(graphs)}')
        neptune.set_property('problems/graphified', len(graphs))
        save_df(graphs_df, 'graphs')
        if cfg.symbol_cost.model == 'gcn':
            # Drop problems that have too large graphs
            for k, v in problems.items():
                num_before = cardinality_finite(v)

                def is_sufficiently_small_py(problem):
                    if cfg.gcn.max_problem_nodes[k] is None:
                        return True
                    num_nodes = graphs_df['graph_nodes'][py_str(problem)]
                    if pd.notna(num_nodes) and num_nodes <= cfg.gcn.max_problem_nodes[k]:
                        return True
                    return False

                def is_sufficiently_small_tf(problem):
                    return tf.py_function(is_sufficiently_small_py, [problem], tf.bool)

                problems[k] = v.filter(is_sufficiently_small_tf)
                num_after = cardinality_finite(problems[k])
                logging.info(
                    f'{k}: {num_after}/{num_before} problems kept because their size is at most {cfg.gcn.max_problem_nodes[k]}.')

        questions = {}
        question_batches = {}
        problems_with_questions = {}
        for k, p in problems.items():
            q = datasets.questions.individual.dict_to_dataset(questions_all, p,
                                                              normalize=cfg.questions.normalize).cache()
            if dataset_is_empty(q):
                warnings.warn(f'Dataset \'{k}\' is empty.')
            questions[k] = q
            batch_size = {'train': cfg.batch_size.train, 'val': cfg.batch_size.val}[k]
            question_batches[k] = datasets.questions.batch.batch(q, batch_size).cache()
            problems_with_questions[k] = [pp for pp in map(py_str, p) if pp in questions_all]
            logging.info(f'Number of {k} problems with questions: {len(problems_with_questions[k])}')
            neptune.set_property(f'problems/with_questions/{k}', len(problems_with_questions[k]))

        checkpoint_dir = 'tf_ckpts'
        epoch_ckpt_dir = os.path.join(checkpoint_dir, 'epoch')
        os.makedirs(epoch_ckpt_dir, exist_ok=True)
        for f in glob.iglob(os.path.join(epoch_ckpt_dir, 'weights.*.tf.*')):
            os.remove(f)
        acc_ckpt_dir = os.path.join(checkpoint_dir, 'val_binary_accuracy')
        os.makedirs(acc_ckpt_dir, exist_ok=True)
        for f in glob.iglob(os.path.join(acc_ckpt_dir, 'weights.*.tf.*')):
            os.remove(f)
        success_ckpt_dir = os.path.join(checkpoint_dir, 'val_solver_success_rate')
        os.makedirs(success_ckpt_dir, exist_ok=True)
        for f in glob.iglob(os.path.join(success_ckpt_dir, 'weights.*.tf.*')):
            os.remove(f)
        tensorboard = callbacks.TensorBoard(log_dir='.', profile_batch=cfg.tb.profile_batch, histogram_freq=1,
                                            embeddings_freq=1)
        cbs = [
            tensorboard,
            callbacks.Time(
                problems={k: next(iter(v.take(32).batch(32))) for k, v in problems.items() if not dataset_is_empty(v)},
                tensorboard=tensorboard),
            tf.keras.callbacks.CSVLogger('epochs.csv'),
            tf.keras.callbacks.ModelCheckpoint(
                os.path.join(epoch_ckpt_dir, 'weights.{epoch:05d}.tf'),
                save_weights_only=True, verbose=0),
            tf.keras.callbacks.ModelCheckpoint(
                os.path.join(acc_ckpt_dir, 'weights.{epoch:05d}-{val_binary_accuracy:.2f}.tf'),
                save_weights_only=True, verbose=1, monitor='val_binary_accuracy', save_best_only=True),
            tf.keras.callbacks.EarlyStopping(**cfg.early_stopping),
            tf.keras.callbacks.ReduceLROnPlateau(**cfg.reduce_lr_on_plateau)
        ]

        solver_eval_problems = None
        if cfg.solver_eval.start is not None or cfg.solver_eval.step is not None:
            solver_eval_problems = problems['val']
            if cfg.solver_eval.problems.val is not None and cfg.solver_eval.problems.val >= 0:
                solver_eval_problems = solver_eval_problems.take(cfg.solver_eval.problems.val)
            if cfg.solver_eval.train_without_questions:
                solver_eval_problems_train = problems['train']
            else:
                solver_eval_problems_train = tf.data.Dataset.from_tensor_slices(problems_with_questions['train'])
            if cfg.solver_eval.problems.train is not None and cfg.solver_eval.problems.train >= 0:
                solver_eval_problems_train = solver_eval_problems_train.take(cfg.solver_eval.problems.train)
            if not dataset_is_empty(solver_eval_problems_train):
                solver_eval_problems = solver_eval_problems.concatenate(solver_eval_problems_train)
            solver_eval_problems = list(OrderedSet(map(py_str, solver_eval_problems)))

        save_df(dataframe_from_records(list(problem_records.values()), index_keys='name', dtypes=problem_records_types),
                'problems')

        logit_models = {}
        for symbol_type in cfg.symbol_types:
            model_logit = get_model_logit(cfg, questions_all, clausifier, cbs, tensorboard, graphifier, symbol_type)
            logit_models[symbol_type] = model_logit
            if symbol_type in cfg.restore_checkpoint:
                filename = cfg.restore_checkpoint[symbol_type]
                if filename is None:
                    continue
                model_logit.load_weights(hydra.utils.to_absolute_path(filename))
                logging.info(f'Checkpoint restored: {hydra.utils.to_absolute_path(filename)}')

        model_logit = next(iter(logit_models.values()))
        model_symbol_cost = model_logit.symbol_cost_model

        # We need to set_model before we begin using tensorboard. Tensorboard is used in other callbacks in symbol cost evaluation.
        tensorboard.set_model(model_logit)

        if solver_eval_problems is not None:
            problem_categories = {
                'all': None,
                'with_questions': questions_all.keys(),
                'graphified': graphs.keys(),
                'with_questions&graphified': OrderedSet(questions_all.keys()) & graphs.keys()
            }
            for cat_name, cat_filename in cfg.solver_eval.problem_set:
                with open(cat_filename) as f:
                    problem_categories[cat_name] = [l.rstrip('\n') for l in f]

            symbol_cost_evaluation_callback = callbacks.SymbolCostEvaluation(
                cfg.solver_eval,
                'epochs_solver_eval.csv',
                solver=solver,
                problems=solver_eval_problems,
                splits={k: list(map(py_str, v)) for k, v in problems.items()},
                symbol_type=cfg.symbol_types[0],
                tensorboard=tensorboard,
                problem_categories=problem_categories,
                baseline=cfg.symbol_cost.model == 'baseline',
                parallel=parallel)
            cbs.append(symbol_cost_evaluation_callback)

            for name, d in cfg.solver_eval.baselines.items():
                df = pd.read_pickle(hydra.utils.to_absolute_path(d.filename))
                logs = symbol_cost_evaluation_callback.evaluate_dataframe(df, name, d.iterations)
                print(f'Baseline \'{name}\':\n{yaml.dump(logs)}')

            if symbol_cost_evaluation_callback.start <= -1:
                print(f'Initial evaluation of the symbol cost model...')
                sc_models = {k: v.symbol_cost_model for k, v in logit_models.items()}
                symbol_cost_evaluation_callback.evaluate(sc_models, epoch=-1)

        if not isinstance(model_symbol_cost, models.symbol_cost.Baseline):
            if cfg.initial_eval:
                for k in question_batches:
                    print(f'Initial evaluation of the logit model on {k} questions...')
                    if k == 'train':
                        x = datasets.questions.batch.batch(questions[k], cfg.batch_size.val)
                    else:
                        x = question_batches[k]
                    metrics = model_logit.evaluate(x, return_dict=True)
                    print(f'Initial evaluation on {k} set: {metrics}')

            if cfg.initial_evaluation_extra:
                initial_evaluation(model_logit, questions_all, problems_all, cfg.batch_size.train)

            if cfg.epochs >= 1:
                print('Training...')
                model_logit.fit(question_batches['train'], validation_data=question_batches['val'], epochs=cfg.epochs,
                                callbacks=cbs)