Esempio n. 1
0
def clear_schema():
    Score.delete().execute()
    Model.delete().execute()
    Submission.delete().execute()
    BenchmarkInstance.delete().execute()
    BenchmarkType.delete().execute()
    Reference.delete().execute()
    User.delete().execute()
Esempio n. 2
0
 def test_run_submission(self):
     model_instances, submission = self.get_test_models()
     run_submission(base_model,
                    model_instances,
                    test_benchmarks=['dicarlo.MajajHong2015.IT-pls'],
                    submission_entry=submission)
     bench_inst = BenchmarkInstance.get(
         benchmark_type_id='dicarlo.MajajHong2015.IT-pls')
     assert not isinstance(bench_inst, list)
     assert Score.get(benchmark=bench_inst)
Esempio n. 3
0
def test_evaluation(database, tmpdir):
    connect_db(database)
    clear_schema()
    init_user()
    working_dir = str(tmpdir.mkdir("sub"))
    config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/'))
    run_evaluation(config_dir, working_dir, 33, database, models=['alexnet'],
                   benchmarks=['dicarlo.MajajHong2015.IT-pls'])
    scores = Score.select().dicts()
    assert len(scores) == 1
    # If comment is none the score was successfully stored, otherwise there would be an error message there
    assert scores[0]['comment'] is None
 def test_model_failure_evaluation(self, tmpdir):
     # os.environ['RESULTCACHING_DISABLE'] = 'brainscore.score_model,model_tools'
     working_dir = str(tmpdir.mkdir('sub'))
     config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/'))
     run_evaluation(config_dir,
                    working_dir,
                    36,
                    TestIntegration.databse,
                    models=['alexnet'],
                    benchmarks=['movshon.FreemanZiemba2013.V1-pls'])
     with open('result_36.csv') as results:
         csv_reader = csv.reader(results, delimiter=',')
         next(csv_reader)  # header row
         result_row = next(csv_reader)
         assert result_row[0] == 'alexnet'
         assert result_row[1] == 'movshon.FreemanZiemba2013.V1-pls'
         assert result_row[2] == '0'
         assert result_row[3] == '0'
     model = Model.get()
     score = Score.get(model=model)
     assert score.comment is not None  # When there's a problem, the comment field contains an error message
 def test_evaluation(self, tmpdir):
     working_dir = str(tmpdir.mkdir('sub'))
     config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/'))
     run_evaluation(config_dir,
                    working_dir,
                    33,
                    TestIntegration.databse,
                    models=['alexnet'],
                    benchmarks=['dicarlo.MajajHong2015.IT-pls'])
     with open('result_33.csv') as results:
         csv_reader = csv.reader(results, delimiter=',')
         next(csv_reader)  # header row
         result_row = next(csv_reader)
         assert result_row[0] == 'alexnet'
         assert result_row[1] == 'dicarlo.MajajHong2015.IT-pls'
         assert self.compare(float(result_row[2]), 0.5857491098187586)
         assert self.compare(float(result_row[3]), 0.5079816726934638)
         assert self.compare(float(result_row[4]), 0.003155449372125895)
     scores = Score.select()
     assert len(scores) == 1
     # successful score comment should inform about which layers were used for which regions
     assert scores[0].comment.startswith("layers:")
 def test_evaluation(self, tmpdir):
     working_dir = str(tmpdir.mkdir('sub'))
     config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/'))
     run_evaluation(config_dir,
                    working_dir,
                    33,
                    TestIntegration.databse,
                    models=['alexnet'],
                    benchmarks=['dicarlo.MajajHong2015.IT-pls'])
     with open('result_33.csv') as results:
         csv_reader = csv.reader(results, delimiter=',')
         next(csv_reader)  # header row
         result_row = next(csv_reader)
         assert result_row[0] == 'alexnet'
         assert result_row[1] == 'dicarlo.MajajHong2015.IT-pls'
         assert self.compare(float(result_row[2]), 0.5857491098187586)
         assert self.compare(float(result_row[3]), 0.5079816726934638)
         assert self.compare(float(result_row[4]), 0.003155449372125895)
     scores = Score.select()
     assert len(scores) == 1
     assert scores[
         0].comment is None  # If comment is none the score was successfully stored, otherwise there would be an error message there
Esempio n. 7
0
def run_submission(module, test_models, test_benchmarks, submission_entry):
    ml_brain_pool = get_ml_pool(test_models, module, submission_entry)
    data = []
    success = True
    try:
        for model_entry in test_models:
            model_id = model_entry.name
            for benchmark_name in test_benchmarks:
                score_entry = None
                try:
                    start = datetime.datetime.now()
                    benchmark_entry = get_benchmark_instance(benchmark_name)
                    # Check if the model is already scored on the benchmark
                    score_entry, created = Score.get_or_create(benchmark=benchmark_entry, model=model_entry,
                                                               defaults={'start_timestamp': start, })
                    if not created and score_entry.score_raw is not None:
                        logger.warning(f'A score for model {model_id} and benchmark {benchmark_name} already exists')
                        raw = score_entry.score_raw
                        ceiled = score_entry.score_ceiled
                        error = score_entry.error
                        finished = score_entry.end_timestamp
                        comment = score_entry.comment
                    else:
                        if not created:
                            score_entry.start_timestamp = datetime.datetime.now()
                            score_entry.comment = None
                            logger.warning('An entry already exists but was not evaluated successful, we rerun!')
                        logger.info(f"Scoring {model_id}, id {model_entry.id} on benchmark {benchmark_name}")
                        model = ml_brain_pool[model_id]
                        score = score_model(model_id, benchmark_name, model)
                        logger.info(f'Running benchmark {benchmark_name} on model {model_id} (id {model_entry.id}) '
                                    f'produced this score: {score}')
                        if not hasattr(score, 'ceiling'):  # many engineering benchmarks do not have a primate ceiling
                            raw = score.sel(aggregation='center').item(0)
                            ceiled = None
                            error = None
                        else:  # score has a ceiling. Store ceiled as well as raw value
                            assert score.raw.sel(aggregation='center') is not None
                            raw = score.raw.sel(aggregation='center').item(0)
                            ceiled = score.sel(aggregation='center').item(0)
                            error = score.sel(aggregation='error').item(0)
                        finished = datetime.datetime.now()
                        comment = f"layers: {model.layer_model.region_layer_map}" \
                            if submission_entry.model_type == 'BaseModel' else ''
                        score_entry.end_timestamp = finished
                        score_entry.error = error
                        score_entry.score_ceiled = ceiled
                        score_entry.score_raw = raw
                        score_entry.comment = comment
                        score_entry.save()
                    result = {
                        'Model': model_id,
                        'Benchmark': benchmark_name,
                        'raw_result': raw,
                        'ceiled_result': ceiled,
                        'error': error,
                        'finished_time': finished,
                        'comment': comment,
                    }
                    data.append(result)
                except Exception as e:
                    success = False
                    error = f'Benchmark {benchmark_name} failed for model {model_id} because of this error: {e}'
                    logging.error(f'Could not run model {model_id} because of following error')
                    logging.error(e, exc_info=True)
                    data.append({
                        'Model': model_id, 'Benchmark': benchmark_name,
                        'raw_result': 0, 'ceiled_result': 0,
                        'error': error, 'finished_time': datetime.datetime.now()
                    })
                    if score_entry:
                        score_entry.comment = error if len(error) <= SCORE_COMMENT_MAX_LENGTH else \
                            error[:int(SCORE_COMMENT_MAX_LENGTH / 2) - 5] + ' [...] ' + \
                            error[-int(SCORE_COMMENT_MAX_LENGTH / 2) + 5:]
                        score_entry.save()
    finally:
        if success:
            submission_entry.status = 'successful'
            logger.info(f'Submission is stored as successful')
        else:
            submission_entry.status = 'failure'
            logger.info(f'Submission was not entirely successful (some benchmarks could not be executed)')
        submission_entry.save()
        return data