from taxonomy import Problem from scales import * import datetime date = datetime.date abstract_strategy_games = Problem("Abstract strategy games", ["agi", "abstract-games"]) playing_with_hints = Problem("Playing abstract games with extensive hints", ["abstract-games"], solved=True) abstract_strategy_games.add_subproblem(playing_with_hints) playing_with_hints.notes = """ Complex abstract strategy games have been solved to super-human levels by computer systems with extensive rule-hinting and heuristics, in some cases combined with machine learning techniques. """ computer_chess = playing_with_hints.metric("Computer Chess", scale=elo, target=2882, target_label="Best human play", target_source="https://en.wikipedia.org/w/index.php?title=Comparison_of_top_chess_players_throughout_history&oldid=777500496#Elo_system") computer_go = playing_with_hints.metric("Computer Go", scale=elo, target=3632, target_label="Best human play", target_source="https://www.goratings.org/en/history/") computer_go.solved = True # until we get proper data # For some caveats, see https://en.wikipedia.org/w/index.php?title=Chess_engine&oldid=764341963#Ratings # We could script ingestion of data from CCRL, or get data from Katja computer_chess.measure(date(1997,5,11), 2725, "Deep Blue", uncertainty=25, url="https://www.quora.com/What-was-Deep-Blues-Elo-rating") computer_chess.measure(date(2006,5,27), 2995, "Rybka 1.1 64bit", uncertainty=25, url="https://web.archive.org/web/20060531091049/http://www.computerchess.org.uk/ccrl/4040/rating_list_all.html") computer_chess.measure(date(2010,8,7), 3269, "Rybka 4 64bit", uncertainty=22, url="https://web.archive.org/web/20100923131123/http://www.computerchess.org.uk/ccrl/4040/rating_list_all.html") computer_chess.measure(date(2013,7,20), 3248, "Houdini 3 64bit", uncertainty=16, url="https://web.archive.org/web/20130415000000*/http://www.computerchess.org.uk/ccrl/4040/rating_list_all.html") computer_chess.measure(date(2015,7,4), 3332, "Komodo 9", uncertainty=24, url="https://web.archive.org/web/20150708104805/http://www.computerchess.org.uk/ccrl/4040/rating_list_all.html") computer_chess.measure(date(2017,2,27), 3393, "Stockfish", uncertainty=50, url="https://web.archive.org/web/20170227044521/http://www.computerchess.org.uk/ccrl/4040/") # Wikipedia has some nice data here: computer_chess.measure(date(1984,12,31), 1631, "Novag Super Constellation 6502 4 MHz", url="https://en.wikipedia.org/wiki/Swedish_Chess_Computer_Association#Rating_list_year-end_leaders") computer_chess.measure(date(1985,12,31), 1827, "Mephisto Amsterdam 68000 12 MHz", url="https://en.wikipedia.org/wiki/Swedish_Chess_Computer_Association#Rating_list_year-end_leaders") computer_chess.measure(date(1986,12,31), 1827, "Mephisto Amsterdam 68000 12 MHz", url="https://en.wikipedia.org/wiki/Swedish_Chess_Computer_Association#Rating_list_year-end_leaders") computer_chess.measure(date(1987,12,31), 1923, "Mephisto Dallas 68020 14 MHz", url="https://en.wikipedia.org/wiki/Swedish_Chess_Computer_Association#Rating_list_year-end_leaders")
from scales import * import datetime date = datetime.date read_stem_papers = Problem( "Read a scientific or technical paper, and comprehend its contents", ["language", "world-modelling", "super"]) # Getting some major results from an abstract, tables or conclusion is much easier than understanding the entire paper, its assumptions, robustness, support for its claims, etc extract_results = Problem( "Extract major numerical results or progress claims from a STEM paper", ["language", "world-modelling", "agi"]) read_stem_papers.add_subproblem(extract_results) extract_results.metric("Automatically find new relevant ML results on arXiv") extract_results.notes = """ This metric is the ability to automatically update the ipython Notebook you are reading by spotting results in pdfs uploaded to arxiv.org. Pull requests demonstrating solutions are welcome :) """ solve_technical_problems = Problem( "Given an arbitrary technical problem, solve it as well as a typical professional in that field", ["language", "world-modelling"]) program_induction = Problem("Writing software from specifications") solve_technical_problems.add_subproblem(program_induction) program_induction.metric("Card2Code", url="https://github.com/deepmind/card2code", scale=correct_percent) vaguely_constrained_technical_problems = Problem(
from taxonomy import Problem from scales import * import datetime date = datetime.date read_stem_papers = Problem("Read a scientific or technical paper, and comprehend its contents", ["language", "world-modelling", "super"]) # Getting some major results from an abstract, tables or conclusion is much easier than understanding the entire paper, its assumptions, robustness, support for its claims, etc extract_results = Problem("Extract major numerical results or progress claims from a STEM paper", ["language", "world-modelling", "agi"]) read_stem_papers.add_subproblem(extract_results) extract_results.metric("Automatically find new relevant ML results on arXiv") extract_results.notes = """ This metric is the ability to automatically update the ipython Notebook you are reading by spotting results in pdfs uploaded to arxiv.org. Pull requests demonstrating solutions are welcome :) """ solve_technical_problems = Problem("Given an arbitrary technical problem, solve it as well as a typical professional in that field", ["language", "world-modelling"]) program_induction = Problem("Writing software from specifications") solve_technical_problems.add_subproblem(program_induction) program_induction.metric("Card2Code", url="https://github.com/deepmind/card2code", scale=correct_percent) vaguely_constrained_technical_problems = Problem("Solve vaguely or under-constrained technical problems") solve_technical_problems.add_subproblem(vaguely_constrained_technical_problems) # This subset of technical problems is much easier; here we assume that a human / worldly problem has been reduced to something that can be # subjected to clear computational evaluation ("is this purported proof of theorem X correct?", "does this circuit perform task Y efficiently?" # "will this airframe fly with reasonable characteristics?") solve_constrained_technical_problems = Problem("Solve technical problems with clear constraints (proofs, circuit design, aerofoil design, etc)") solve_technical_problems.add_subproblem(solve_constrained_technical_problems)