Beispiel #1
0
def initial_evaluation(model_logit, questions_all, problems_all, batch_size, print_each_problem=False):
    print('Initial evaluation...')
    ds_individual = datasets.questions.individual.dict_to_dataset(questions_all, problems_all)
    print(f'Evaluating with batch size {batch_size}...')
    eval_res = model_logit.evaluate(datasets.questions.batch.batch(ds_individual, batch_size), return_dict=True)
    print(f'Evaluation result with batch size {batch_size}: {eval_res}')
    ds_batches = datasets.questions.batch.batch(ds_individual, 1)
    print('Evaluating with batch size 1...')
    eval_res = model_logit.evaluate(ds_batches, return_dict=True)
    print(f'Evaluation result with batch size 1: {eval_res}')
    batch_sizes = []
    batch_losses = []
    logit_lists = []
    for batch in tqdm(ds_batches, disable=print_each_problem):
        problem_names = [py_str(p) for p in batch['problems']]
        eval_res = model_logit.evaluate(tf.data.Dataset.from_tensors(batch), return_dict=True, verbose=0)
        call_res = model_logit(batch, training=False)
        if print_each_problem:
            print(f'{problem_names}: {eval_res}, {call_res}')
        batch_sizes.append(len(problem_names))
        batch_losses.append(eval_res['loss'])
        logit_lists.append(call_res.flat_values)
    print('Weighted average loss (expected overall loss): ', np.average(batch_losses, weights=batch_sizes))
    print('Mean of batch losses: ', np.mean(batch_losses))
    print('Median of batch losses (expected: 0.69): ', np.median(batch_losses))
    logits = tf.concat(logit_lists, axis=0)
    print('Mean of question logits (expected: 0): ', np.mean(logits))
    print('Mean of question accuracies (expected: 0.5): ', np.mean(logits.numpy() > 0))
    accs = []
    for logit in logit_lists:
        n_correct = np.count_nonzero(logit.numpy() > 0)
        acc = n_correct / len(logit)
        accs.append(acc)
    print('Weighted average accuracy (expected overall accuracy): ', np.mean(accs))
Beispiel #2
0
 def is_sufficiently_small_py(problem):
     if cfg.gcn.max_problem_nodes[k] is None:
         return True
     num_nodes = graphs_df['graph_nodes'][py_str(problem)]
     if pd.notna(num_nodes) and num_nodes <= cfg.gcn.max_problem_nodes[k]:
         return True
     return False
Beispiel #3
0
 def graphify(self, problem, cache=True):
     # TODO: Time the whole call (all inclusive).
     problem = py_str(problem)
     logging.debug(f'Graphifying problem {problem}...')
     record = {'problem': problem, 'error': None, 'clausify_cached': cache}
     nodes_lower_bound = self.nodes_lower_bound(problem)
     if self.max_number_of_nodes is not None and nodes_lower_bound > self.max_number_of_nodes:
         record['error'] = 'nodes_from_tptp_header'
         record['graph_nodes_lower_bound'] = nodes_lower_bound
         return None, record
     with timer() as t:
         clausify_result = self.clausifier.clausify(problem, cache=cache)
     record.update({
         'clausify_returncode': clausify_result.returncode,
         'clausify_time': t.elapsed,
         'clausify_time_original': clausify_result.time_elapsed
     })
     if clausify_result.returncode != 0 or clausify_result.clauses is None or clausify_result.symbols is None:
         logging.debug(
             f'Failed to graphify problem {problem}: clausification failed.'
         )
         record['error'] = 'clausify'
         return None, record
     symbol_types = ('predicate', 'function')
     symbols = {
         symbol_type: clausify_result.symbols_of_type(symbol_type)
         for symbol_type in symbol_types
     }
     record['num_clauses'] = len(clausify_result.clauses)
     record.update({
         f'num_{symbol_type}': len(symbols[symbol_type])
         for symbol_type in symbol_types
     })
     with timer() as t:
         try:
             g = self.clausify_result_to_graph(clausify_result)
         except FormulaVisitor.NumNodesError as e:
             # The graph would be too large (too many nodes).
             logging.debug(f'Failed to graphify problem {problem}.',
                           exc_info=True)
             record['error'] = 'node_count'
             g = None
             record['graph_nodes_lower_bound'] = e.actual
     record['graph_time'] = t.elapsed
     if g is not None:
         record['graph_nodes'] = g.num_nodes()
         record['graph_nodes_lower_bound'] = g.num_nodes()
         record.update({
             f'graph_nodes_{ntype}': g.num_nodes(ntype)
             for ntype in g.ntypes
         })
         record['graph_edges'] = sum(
             g.num_edges(canonical_etype)
             for canonical_etype in g.canonical_etypes)
         logging.debug(f'Problem {problem} graphified.')
     return g, record
Beispiel #4
0
def get_datasets_split(patterns, validation_split, max_problems=None):
    problems_all = get_dataset(patterns)
    logging.info('Number of problems available: %d', problems_all.cardinality())
    logging.debug('Leading 10 problems: %s', [py_str(p) for p in problems_all.take(10)])
    if max_problems is not None:
        problems_all = problems_all.take(max_problems)
    logging.info('Number of problems taken: %d', problems_all.cardinality())
    assert 0 <= validation_split <= 1
    problems_validation_count = tf.cast(tf.cast(problems_all.cardinality(), tf.float32) * validation_split, tf.int64)
    problems = {
        'validation': problems_all.take(problems_validation_count),
        'train': problems_all.skip(problems_validation_count)
    }
    for k in problems:
        logging.info(f'Number of {k} problems: %d', problems[k].cardinality())
    return problems
Beispiel #5
0
 def gen():
     for problem in problems:
         try:
             q = tf.constant(questions[py_str(problem)], dtype=dtype)
             # Let n be the number of symbols in the problem.
             n_symbols = tf.shape(q)[1]
             n_symbols = tf.cast(n_symbols, q.dtype)
             tf.debugging.assert_greater_equal(n_symbols, tf.reduce_max(q))
             tf.debugging.assert_less_equal(-n_symbols, tf.reduce_min(q))
             if normalize:
                 # We scale the question matrix by a factor that makes questions from problems of various sizes commensurable.
                 # We set the factor so that if symbol cost is 1 for all symbols, then precedence cost is 1 for all precedences.
                 q *= 2 / (n_symbols * (n_symbols + 1))
                 tf.debugging.assert_greater_equal(2 / (n_symbols + 1), tf.reduce_max(q))
                 tf.debugging.assert_less_equal(-2 / (n_symbols + 1), tf.reduce_min(q))
             yield {'problem': problem, 'questions': q}
         except KeyError:
             pass
Beispiel #6
0
 def _predict_one(self, problem):
     problem = py_str(problem)
     try:
         return self.costs[problem], True
     except KeyError:
         return self.invalid_costs(), False