def clear_schema(): Score.delete().execute() Model.delete().execute() Submission.delete().execute() BenchmarkInstance.delete().execute() BenchmarkType.delete().execute() Reference.delete().execute() User.delete().execute()
def test_run_submission(self): model_instances, submission = self.get_test_models() run_submission(base_model, model_instances, test_benchmarks=['dicarlo.MajajHong2015.IT-pls'], submission_entry=submission) bench_inst = BenchmarkInstance.get( benchmark_type_id='dicarlo.MajajHong2015.IT-pls') assert not isinstance(bench_inst, list) assert Score.get(benchmark=bench_inst)
def test_evaluation(database, tmpdir): connect_db(database) clear_schema() init_user() working_dir = str(tmpdir.mkdir("sub")) config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/')) run_evaluation(config_dir, working_dir, 33, database, models=['alexnet'], benchmarks=['dicarlo.MajajHong2015.IT-pls']) scores = Score.select().dicts() assert len(scores) == 1 # If comment is none the score was successfully stored, otherwise there would be an error message there assert scores[0]['comment'] is None
def test_model_failure_evaluation(self, tmpdir): # os.environ['RESULTCACHING_DISABLE'] = 'brainscore.score_model,model_tools' working_dir = str(tmpdir.mkdir('sub')) config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/')) run_evaluation(config_dir, working_dir, 36, TestIntegration.databse, models=['alexnet'], benchmarks=['movshon.FreemanZiemba2013.V1-pls']) with open('result_36.csv') as results: csv_reader = csv.reader(results, delimiter=',') next(csv_reader) # header row result_row = next(csv_reader) assert result_row[0] == 'alexnet' assert result_row[1] == 'movshon.FreemanZiemba2013.V1-pls' assert result_row[2] == '0' assert result_row[3] == '0' model = Model.get() score = Score.get(model=model) assert score.comment is not None # When there's a problem, the comment field contains an error message
def test_evaluation(self, tmpdir): working_dir = str(tmpdir.mkdir('sub')) config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/')) run_evaluation(config_dir, working_dir, 33, TestIntegration.databse, models=['alexnet'], benchmarks=['dicarlo.MajajHong2015.IT-pls']) with open('result_33.csv') as results: csv_reader = csv.reader(results, delimiter=',') next(csv_reader) # header row result_row = next(csv_reader) assert result_row[0] == 'alexnet' assert result_row[1] == 'dicarlo.MajajHong2015.IT-pls' assert self.compare(float(result_row[2]), 0.5857491098187586) assert self.compare(float(result_row[3]), 0.5079816726934638) assert self.compare(float(result_row[4]), 0.003155449372125895) scores = Score.select() assert len(scores) == 1 # successful score comment should inform about which layers were used for which regions assert scores[0].comment.startswith("layers:")
def test_evaluation(self, tmpdir): working_dir = str(tmpdir.mkdir('sub')) config_dir = str(os.path.join(os.path.dirname(__file__), 'configs/')) run_evaluation(config_dir, working_dir, 33, TestIntegration.databse, models=['alexnet'], benchmarks=['dicarlo.MajajHong2015.IT-pls']) with open('result_33.csv') as results: csv_reader = csv.reader(results, delimiter=',') next(csv_reader) # header row result_row = next(csv_reader) assert result_row[0] == 'alexnet' assert result_row[1] == 'dicarlo.MajajHong2015.IT-pls' assert self.compare(float(result_row[2]), 0.5857491098187586) assert self.compare(float(result_row[3]), 0.5079816726934638) assert self.compare(float(result_row[4]), 0.003155449372125895) scores = Score.select() assert len(scores) == 1 assert scores[ 0].comment is None # If comment is none the score was successfully stored, otherwise there would be an error message there
def run_submission(module, test_models, test_benchmarks, submission_entry): ml_brain_pool = get_ml_pool(test_models, module, submission_entry) data = [] success = True try: for model_entry in test_models: model_id = model_entry.name for benchmark_name in test_benchmarks: score_entry = None try: start = datetime.datetime.now() benchmark_entry = get_benchmark_instance(benchmark_name) # Check if the model is already scored on the benchmark score_entry, created = Score.get_or_create(benchmark=benchmark_entry, model=model_entry, defaults={'start_timestamp': start, }) if not created and score_entry.score_raw is not None: logger.warning(f'A score for model {model_id} and benchmark {benchmark_name} already exists') raw = score_entry.score_raw ceiled = score_entry.score_ceiled error = score_entry.error finished = score_entry.end_timestamp comment = score_entry.comment else: if not created: score_entry.start_timestamp = datetime.datetime.now() score_entry.comment = None logger.warning('An entry already exists but was not evaluated successful, we rerun!') logger.info(f"Scoring {model_id}, id {model_entry.id} on benchmark {benchmark_name}") model = ml_brain_pool[model_id] score = score_model(model_id, benchmark_name, model) logger.info(f'Running benchmark {benchmark_name} on model {model_id} (id {model_entry.id}) ' f'produced this score: {score}') if not hasattr(score, 'ceiling'): # many engineering benchmarks do not have a primate ceiling raw = score.sel(aggregation='center').item(0) ceiled = None error = None else: # score has a ceiling. Store ceiled as well as raw value assert score.raw.sel(aggregation='center') is not None raw = score.raw.sel(aggregation='center').item(0) ceiled = score.sel(aggregation='center').item(0) error = score.sel(aggregation='error').item(0) finished = datetime.datetime.now() comment = f"layers: {model.layer_model.region_layer_map}" \ if submission_entry.model_type == 'BaseModel' else '' score_entry.end_timestamp = finished score_entry.error = error score_entry.score_ceiled = ceiled score_entry.score_raw = raw score_entry.comment = comment score_entry.save() result = { 'Model': model_id, 'Benchmark': benchmark_name, 'raw_result': raw, 'ceiled_result': ceiled, 'error': error, 'finished_time': finished, 'comment': comment, } data.append(result) except Exception as e: success = False error = f'Benchmark {benchmark_name} failed for model {model_id} because of this error: {e}' logging.error(f'Could not run model {model_id} because of following error') logging.error(e, exc_info=True) data.append({ 'Model': model_id, 'Benchmark': benchmark_name, 'raw_result': 0, 'ceiled_result': 0, 'error': error, 'finished_time': datetime.datetime.now() }) if score_entry: score_entry.comment = error if len(error) <= SCORE_COMMENT_MAX_LENGTH else \ error[:int(SCORE_COMMENT_MAX_LENGTH / 2) - 5] + ' [...] ' + \ error[-int(SCORE_COMMENT_MAX_LENGTH / 2) + 5:] score_entry.save() finally: if success: submission_entry.status = 'successful' logger.info(f'Submission is stored as successful') else: submission_entry.status = 'failure' logger.info(f'Submission was not entirely successful (some benchmarks could not be executed)') submission_entry.save() return data