def test_learners( description, group, max_tests, needed_wins, row_limits, col_limits, seed, grader, ): """Test data generation methods beat given learner. Requires test description, test case group, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case incorrect = True msgs = [] try: data_x, data_y = None, None same_data_x, same_data_y = None, None diff_data_x, diff_data_y = None, None better_learner, worse_learner = None, None if group == "author": try: from gen_data import author auth_string = run_with_timeout(author, seconds_per_test_case, (), {}) if auth_string == "tb34": incorrect = True msgs.append(" Incorrect author name (tb34)") points_earned = -10 elif auth_string == "": incorrect = True msgs.append(" Empty author name") points_earned = -10 else: incorrect = False except Exception as e: incorrect = True msgs.append( " Exception occured when calling author() method: {}". format(e)) points_earned = -10 else: if group == "best4dt": from gen_data import best_4_dt data_x, data_y = run_with_timeout(best_4_dt, seconds_per_test_case, (), {"seed": seed}) same_data_x, same_data_y = run_with_timeout( best_4_dt, seconds_per_test_case, (), {"seed": seed}) diff_data_x, diff_data_y = run_with_timeout( best_4_dt, seconds_per_test_case, (), {"seed": seed + 1}) better_learner = DTLearner worse_learner = LinRegLearner elif group == "best4lr": from gen_data import best_4_lin_reg data_x, data_y = run_with_timeout(best_4_lin_reg, seconds_per_test_case, (), {"seed": seed}) same_data_x, same_data_y = run_with_timeout( best_4_lin_reg, seconds_per_test_case, (), {"seed": seed}) diff_data_x, diff_data_y = run_with_timeout( best_4_lin_reg, seconds_per_test_case, (), {"seed": seed + 1}) better_learner = LinRegLearner worse_learner = DTLearner num_samples = data_x.shape[0] cutoff = int(num_samples * 0.6) worse_better_err = [] for run in range(max_tests): permutation = np.random.permutation(num_samples) train_x, train_y = ( data_x[permutation[:cutoff]], data_y[permutation[:cutoff]], ) test_x, test_y = ( data_x[permutation[cutoff:]], data_y[permutation[cutoff:]], ) better = better_learner() worse = worse_learner() better.add_evidence(train_x, train_y) worse.add_evidence(train_x, train_y) better_pred = better.query(test_x) worse_pred = worse.query(test_x) better_err = np.linalg.norm(test_y - better_pred) worse_err = np.linalg.norm(test_y - worse_pred) worse_better_err.append((worse_err, better_err)) worse_better_err.sort(key=functools.cmp_to_key(lambda a, b: int( (b[0] - b[1]) - (a[0] - a[1])))) better_wins_count = 0 for worse_err, better_err in worse_better_err: if better_err < 0.9 * worse_err: better_wins_count = better_wins_count + 1 points_earned += 5.0 if better_wins_count >= needed_wins: break incorrect = False if (data_x.shape[0] < row_limits[0]) or (data_x.shape[0] > row_limits[1]): incorrect = True msgs.append(" Invalid number of rows. Should be between {}," " found {}".format(row_limits, data_x.shape[0])) points_earned = max(0, points_earned - 20) if (data_x.shape[1] < col_limits[0]) or (data_x.shape[1] > col_limits[1]): incorrect = True msgs.append( " Invalid number of columns. Should be between {}," " found {}".format(col_limits, data_x.shape[1])) points_earned = max(0, points_earned - 20) if better_wins_count < needed_wins: incorrect = True msgs.append( " Better learner did not exceed worse learner. Expected" " {}, found {}".format(needed_wins, better_wins_count)) if not (np.array_equal(same_data_y, data_y)) or not ( np.array_equal(same_data_x, data_x)): incorrect = True msgs.append( " Did not produce the same data with the same seed.\n" + " First data_x:\n{}\n".format(data_x) + " Second data_x:\n{}\n".format(same_data_x) + " First data_y:\n{}\n".format(data_y) + " Second data_y:\n{}\n".format(same_data_y)) points_earned = max(0, points_earned - 20) if np.array_equal(diff_data_y, data_y) and np.array_equal( diff_data_x, data_x): incorrect = True msgs.append(" Did not produce different data with different" " seeds.\n" + " First data_x:\n{}\n".format(data_x) + " Second data_x:\n{}\n".format(diff_data_x) + " First data_y:\n{}\n".format(data_y) + " Second data_y:\n{}\n".format(diff_data_y)) points_earned = max(0, points_earned - 20) if incorrect: if group == "author": raise IncorrectOutput( "Test failed on one or more criteria.\n {}".format( "\n".join(msgs))) else: inputs_str = " Residuals: {}".format(worse_better_err) raise IncorrectOutput( "Test failed on one or more output criteria.\n " " Inputs:\n{}\n Failures:\n{}".format( inputs_str, "\n".join(msgs))) else: if group != "author": avg_ratio = 0.0 worse_better_err.sort(key=functools.cmp_to_key( lambda a, b: int(np.sign((b[0] - b[1]) - (a[0] - a[1]))))) for we, be in worse_better_err[:10]: avg_ratio += float(we) - float(be) avg_ratio = avg_ratio / 10.0 if group == "best4dt": grader.add_performance(np.array([avg_ratio, 0])) else: grader.add_performance(np.array([0, avg_ratio])) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = ( os.path.basename(row[0]), row[1], row[2], row[3], ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if (row[0] == "gen_data.py")] if tb_list: msg += "Traceback:\n" msg += "".join(tb.format_list(tb_list)) # contains newlines elif "grading_traceback" in dir(e): msg += "Traceback:\n" msg += "".join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome="failed", points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome="passed", points=points_earned, msg=None))
def test_qlearning(description, group, world_file, best_reward, median_reward, max_time, points, grader): points_earned = 0.0 # initialize points for this test case try: incorrect = True if not 'QLearner' in globals(): import importlib m = importlib.import_module('QLearner') globals()['QLearner'] = m # Unpack test case world = np.array([list(map(float,s.strip().split(','))) for s in util.get_robot_world_file(world_file).readlines()]) student_reward = None student_author = None msgs = [] if group=='nodyna': def timeoutwrapper_nodyna(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) return qltest(worldmap=world,iterations=500,max_steps=10000,learner=learner,verbose=False) student_reward = run_with_timeout(timeoutwrapper_nodyna,max_time,(),{}) incorrect = False if student_reward < 1.5*median_reward: incorrect = True msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward)) elif group=='dyna': def timeoutwrapper_dyna(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.5, \ radr = 0.99, \ dyna = 200, \ verbose=False) return qltest(worldmap=world,iterations=50,max_steps=10000,learner=learner,verbose=False) student_reward = run_with_timeout(timeoutwrapper_dyna,max_time,(),{}) incorrect = False if student_reward < 1.5*median_reward: incorrect = True msgs.append(" Reward too low, expected %s, found %s"%(median_reward,student_reward)) elif group=='author': points_earned = -20 def timeoutwrapper_author(): # Note: the following will NOT be commented durring final grading # random.seed(robot_qlearning_testing_seed) # np.random.seed(robot_qlearning_testing_seed) learner = QLearner.QLearner(num_states=100,\ num_actions = 4, \ alpha = 0.2, \ gamma = 0.9, \ rar = 0.98, \ radr = 0.999, \ dyna = 0, \ verbose=False) return learner.author() student_author = run_with_timeout(timeoutwrapper_author,max_time,(),{}) student_reward = best_reward+1 incorrect = False if (student_author is None) or (student_author=='tb34'): incorrect = True msgs.append(" author() method not implemented correctly. Found {}".format(student_author)) else: points_earned = points if (not incorrect): points_earned += points if incorrect: inputs_str = " group: {}\n" \ " world_file: {}\n"\ " median_reward: {}\n".format(group, world_file, median_reward) raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines elif 'grading_traceback' in dir(e): msg += "Traceback:\n" msg += ''.join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def test_optimization(inputs, outputs, description, grader): """Test find_optimal_allocations() returns correct allocations. Requires test inputs, expected outputs, description, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module mod = importlib.import_module(main_code) globals()[main_code] = mod # * Import methods to test (refactored out, spring 2016, --BPH) # for m in ['find_optimal_allocations']: # globals()[m] = getattr(mod, m) # Unpack test case start_date = inputs['start_date'] end_date = inputs['end_date'] symbols = inputs['symbols'] # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM'] # Read in adjusted closing prices for given symbols, date range # dates = pd.date_range(start_date, end_date) # prices_all = get_data(symbols, dates) # automatically adds SPY # prices = prices_all[symbols] # only portfolio symbols # Run student code with time limit (in seconds, per test case) port_stats = {} with time_limit(seconds_per_test_case): # * Find optimal allocations student_allocs, student_cr, student_adr, student_sddr, student_sr = optimization.optimize_portfolio( sd=start_date, ed=end_date, syms=symbols, gen_plot=False) student_allocs = np.float32( student_allocs ) # make sure it's a NumPy array, for easier computation # Verify against expected outputs and assign points incorrect = False msgs = [] correct_allocs = outputs['allocs'] # * Check sum_to_one: Allocations sum to 1.0 +/- margin sum_allocs = np.sum(student_allocs) if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']: incorrect = True msgs.append(" sum of allocations: {} (expected: 1.0)".format( sum_allocs)) student_allocs = student_allocs / sum_allocs # normalize allocations, if they don't sum to 1.0 else: points_earned += points_per_component['sum_to_one'] # * Get daily portfolio value and statistics, for comparison #port_val = get_portfolio_value(prices, allocs, start_val) #cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val) # * Check alloc_range: Each allocation is within [0.0, 1.0] +/- margin # * Check alloc_match: Each allocation matches expected value +/- margin points_per_alloc_range = points_per_component['alloc_range'] / len( correct_allocs) points_per_alloc_match = points_per_component['alloc_match'] / len( correct_allocs) for symbol, alloc, correct_alloc in zip(symbols, student_allocs, correct_allocs): if alloc < -abs_margins['alloc_range'] or alloc > ( 1.0 + abs_margins['alloc_range']): incorrect = True msgs.append( " {} - allocation out of range: {} (expected: [0.0, 1.0])" .format(symbol, alloc)) else: points_earned += points_per_alloc_range if abs(alloc - correct_alloc) > abs_margins['alloc_match']: incorrect = True msgs.append( " {} - incorrect allocation: {} (expected: {})". format(symbol, alloc, correct_alloc)) else: points_earned += points_per_alloc_match #points_earned = round(points_earned) # round off points earned to nearest integer (?) if incorrect: inputs_str = " start_date: {}\n" \ " end_date: {}\n" \ " symbols: {}\n".format(start_date, end_date, symbols) # If there are problems with the stats and all of the values returned match the template code, exactly, then award 0 points #if check_template(student_allocs, student_cr, student_adr, student_sddr, student_sr): points_earned = 0 raise IncorrectOutput( "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}" .format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == 'optimization.py'] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_strategy(description, insample_args, outsample_args, benchmark_type, benchmark, impact, train_time, test_time, max_time, seed, grader): """Test StrategyLearner. Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float) max time (seconds), points for this test case (int), random seed (long), and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: incorrect = True if not 'StrategyLearner' in globals(): import importlib m = importlib.import_module('StrategyLearner') globals()['StrategyLearner'] = m outsample_cr_to_beat = None if benchmark_type == 'clean': outsample_cr_to_beat = benchmark def timeoutwrapper_strategylearner(): #Set fixed seed for repetability np.random.seed(seed) random.seed(seed) learner = StrategyLearner.StrategyLearner(verbose=False,impact=impact) tmp = time.time() learner.addEvidence(**insample_args) train_t = time.time()-tmp tmp = time.time() insample_trades_1 = learner.testPolicy(**insample_args) test_t = time.time()-tmp insample_trades_2 = learner.testPolicy(**insample_args) tmp = time.time() outsample_trades = learner.testPolicy(**outsample_args) out_test_t = time.time()-tmp return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t msgs = [] in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(timeoutwrapper_strategylearner,max_time,(),{}) incorrect = False if len(in_trades_1.shape)!=2 or in_trades_1.shape[1]!=1: incorrect=True msgs.append(" First insample trades DF has invalid shape: {}".format(in_trades_1.shape)) elif len(in_trades_2.shape)!=2 or in_trades_2.shape[1]!=1: incorrect=True msgs.append(" Second insample trades DF has invalid shape: {}".format(in_trades_2.shape)) elif len(out_trades.shape)!=2 or out_trades.shape[1]!=1: incorrect=True msgs.append(" Out-of-sample trades DF has invalid shape: {}".format(out_trades.shape)) else: tmp_csum=0.0 for date,trade in in_trades_1.iterrows(): tmp_csum+= trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect=True msgs.append(" illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) break elif abs(tmp_csum)>MAX_HOLDINGS: incorrect=True msgs.append(" holdings more than {} long or short in first insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) break tmp_csum=0.0 for date,trade in in_trades_2.iterrows(): tmp_csum+= trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect=True msgs.append(" illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) break elif abs(tmp_csum)>MAX_HOLDINGS: incorrect=True msgs.append(" holdings more than {} long or short in second insample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) break tmp_csum=0.0 for date,trade in out_trades.iterrows(): tmp_csum+= trade.iloc[0] if (trade.iloc[0]!=0) and\ (trade.abs().iloc[0]!=MAX_HOLDINGS) and\ (trade.abs().iloc[0]!=2*MAX_HOLDINGS): incorrect=True msgs.append(" illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n Date {}, Trade {}".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS,date,trade)) break elif abs(tmp_csum)>MAX_HOLDINGS: incorrect=True msgs.append(" holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}".format(MAX_HOLDINGS,date,trade)) break # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\ # ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\ # ((out_trades.abs()!=0) & (out_trades.abs()!=MAX_HOLDINGS) & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()): # incorrect = True # msgs.append(" illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS)) # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]): # incorrect = True # msgs.append(" holdings more than {} long or short".format(MAX_HOLDINGS)) if not(incorrect): if train_t>train_time: incorrect=True msgs.append(" addEvidence() took {} seconds, max allowed {}".format(train_t,train_time)) else: points_earned += 1.0 if test_t > test_time: incorrect = True msgs.append(" testPolicy() took {} seconds, max allowed {}".format(test_t,test_time)) else: points_earned += 2.0 if not((in_trades_1 == in_trades_2).all()[0]): incorrect = True mismatches = in_trades_1.join(in_trades_2,how='outer',lsuffix='1',rsuffix='2') mismatches = mismatches[mismatches.iloc[:,0]!=mismatches.iloc[:,1]] msgs.append(" consecutive calls to testPolicy() with same input did not produce same output:") msgs.append(" Mismatched trades:\n {}".format(mismatches)) else: points_earned += 2.0 student_insample_cr = evalPolicy2(insample_args['symbol'],in_trades_1,insample_args['sv'],insample_args['sd'],insample_args['ed'],market_impact=impact,commission_cost=0.0) student_outsample_cr = evalPolicy2(outsample_args['symbol'],out_trades, outsample_args['sv'],outsample_args['sd'],outsample_args['ed'],market_impact=impact,commission_cost=0.0) if student_insample_cr <= benchmark: incorrect = True msgs.append(" in-sample return ({}) did not beat benchmark ({})".format(student_insample_cr,benchmark)) else: points_earned += 5.0 if outsample_cr_to_beat is None: if out_test_t > test_time: incorrect = True msgs.append(" out-sample took {} seconds, max of {}".format(out_test_t,test_time)) else: points_earned += 5.0 else: if student_outsample_cr < outsample_cr_to_beat: incorrect = True msgs.append(" out-sample return ({}) did not beat benchmark ({})".format(student_outsample_cr,outsample_cr_to_beat)) else: points_earned += 5.0 if incorrect: inputs_str = " insample_args: {}\n" \ " outsample_args: {}\n" \ " benchmark_type: {}\n" \ " benchmark: {}\n" \ " train_time: {}\n" \ " test_time: {}\n" \ " max_time: {}\n" \ " seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed) raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines elif 'grading_traceback' in dir(e): msg += "Traceback:\n" msg += ''.join(tb.format_list(e.grading_traceback)) msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
def test_marketsim(description, group, inputs, outputs, grader): """Test compute_portvals() returns correct daily portfolio values. Requires test description, test case group, inputs, expected outputs, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module mod = importlib.import_module(main_code) globals()[main_code] = mod # * Import methods to test for m in ['compute_portvals']: globals()[m] = getattr(mod, m) incorrect = False msgs = [] if group == 'author': try: auth_string = run_with_timeout(marketsim.author, seconds_per_test_case, (), {}) if auth_string == 'tb34': incorrect = True msgs.append(" Incorrect author name (tb34)") points_earned = -10 elif auth_string == '': incorrect = True msgs.append(" Empty author name") points_earned = -10 except Exception as e: incorrect = True msgs.append( " Exception occured when calling author() method: {}". format(e)) points_earned = -10 else: # Unpack test case orders_file = inputs['orders_file'] start_val = inputs['start_val'] impct = inputs['impact'] commish = inputs['commission'] portvals = None fullpath_orders_file = get_orders_data_file(orders_file) portvals = run_with_timeout( compute_portvals, seconds_per_test_case, (), { 'orders_file': fullpath_orders_file, 'start_val': start_val, 'commission': commish, 'impact': impct }) # * Check return type is correct, coax into Series assert (type(portvals) == pd.Series) or ( type(portvals) == pd.DataFrame and len(portvals.columns) == 1), "You must return a Series or single-column DataFrame!" if type(portvals) == pd.DataFrame: portvals = portvals[portvals.columns[ 0]] # convert single-column DataFrame to Series if portvals.isnull().values.any(): incorrect = True msgs.append("Portfolio values cannot be NaNs!") else: if group == 'basic': if len(portvals) != outputs['num_days']: incorrect = True msgs.append( " Incorrect number of days: {}, expected {}". format(len(portvals), outputs['num_days'])) else: points_earned += 2.0 if abs(portvals[-1] - outputs['last_day_portval']) > ( 0.001 * outputs['last_day_portval']): incorrect = True msgs.append( " Incorrect final value: {}, expected {}".format( portvals[-1], outputs['last_day_portval'])) else: points_earned += 5.0 adr, sr = get_stats(portvals) if abs(sr - outputs['sharpe_ratio']) > abs( 0.001 * outputs['sharpe_ratio']): incorrect = True msgs.append( " Incorrect sharpe ratio: {}, expected {}". format(sr, outputs['sharpe_ratio'])) else: points_earned += 1.0 if abs(adr - outputs['avg_daily_ret']) > abs( 0.001 * outputs['avg_daily_ret']): incorrect = True msgs.append( " Incorrect avg daily return: {}, expected {}". format(adr, outputs['avg_daily_ret'])) else: points_earned += 1.0 elif group == 'commission' or group == 'impact' or group == 'both': if abs(portvals[-1] - outputs['last_day_portval'] ) > 0.001: #(0.001*outputs['last_day_portval']): incorrect = True msgs.append( " Incorrect final value: {}, expected {}".format( portvals[-1], outputs['last_day_portval'])) else: points_earned += 2.0 if incorrect: # inputs_str = " orders_file: {}\n" \ # " start_val: {}\n".format(orders_file, start_val) raise IncorrectOutput( "Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}" .format(inputs, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) if 'grading_traceback' in dir(e): tb_list = e.grading_traceback for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3] ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == 'marketsim.py'] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome='failed', points=max(points_earned, 0), msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome='passed', points=points_earned, msg=None))
def test_analysis(inputs, outputs, description, grader): """Test get_portfolio_value() and get_portfolio_stats() return correct values. Requires test inputs, expected outputs, description, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: # Try to import student code (only once) if not main_code in globals(): import importlib # * Import module mod = importlib.import_module(main_code) globals()[main_code] = mod # Unpack test case start_date_str = inputs["start_date"].split("-") start_date = datetime.datetime( int(start_date_str[0]), int(start_date_str[1]), int(start_date_str[2]), ) end_date_str = inputs["end_date"].split("-") end_date = datetime.datetime(int(end_date_str[0]), int(end_date_str[1]), int(end_date_str[2])) symbols = list(inputs["symbol_allocs"].keys() ) # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM'] allocs = list( inputs["symbol_allocs"].values()) # e.g.: [0.2, 0.3, 0.4, 0.1] start_val = inputs["start_val"] risk_free_rate = inputs.get("risk_free_rate", 0.0) # the wonky unpacking here is so that we only pull out the values we say we'll test. def timeoutwrapper_analysis(): student_rv = assess_portfolio( sd=start_date, ed=end_date, syms=symbols, allocs=allocs, sv=start_val, rfr=risk_free_rate, sf=252.0, gen_plot=False, ) return student_rv result = run_with_timeout(timeoutwrapper_analysis, max_seconds_per_call, (), {}) student_cr = result[0] student_adr = result[1] student_sr = result[3] port_stats = OrderedDict([ ("cum_ret", student_cr), ("avg_daily_ret", student_adr), ("sharpe_ratio", student_sr), ]) # Verify against expected outputs and assign points incorrect = False msgs = [] for key, value in port_stats.items(): if abs(value - outputs[key]) > abs_margins[key]: incorrect = True msgs.append(" {}: {} (expected: {})".format( key, value, outputs[key])) else: points_earned += points_per_output[key] # partial credit if incorrect: inputs_str = (" start_date: {}\n" " end_date: {}\n" " symbols: {}\n" " allocs: {}\n" " start_val: {}".format(start_date, end_date, symbols, allocs, start_val)) raise IncorrectOutput( "One or more stats were incorrect.\n Inputs:\n{}\n Wrong" " values:\n{}".format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Test case description: {}\n".format(description) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = ( os.path.basename(row[0]), row[1], row[2], row[3], ) # show only filename instead of long absolute path tb_list = [row for row in tb_list if row[0] == "analysis.py"] if tb_list: msg += "Traceback:\n" msg += "".join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result( GradeResult(outcome="failed", points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result( GradeResult(outcome="passed", points=points_earned, msg=None))
def test_learners(description, group, datafile, seed, outputs, grader): """Test ML models returns correct predictions. Requires test description, test case group, inputs, expected outputs, and a grader fixture. """ points_earned = 0.0 # initialize points for this test case try: learner_class = None kwargs = {'verbose':False} # (BPH) Copied from grade_strategy_qlearning.py #Set fixed seed for repetability np.random.seed(seed) random.seed(seed) #remove ability to seed either np.random or python random tmp_numpy_seed = np.random.seed tmp_random_seed = random.seed np.random.seed = fake_seed random.seed = fake_rseed # Try to import KNNLearner (only once) # if not 'KNNLearner' in globals(): # from KNNLearner import KNNLearner if not 'RTLearner' in globals(): from RTLearner import RTLearner if not 'DTLearner' in globals(): from DTLearner import DTLearner if (group is 'BagLearner') or (group is 'InsaneLearner') or (group is 'RandomName') and (not 'BagLearner' in globals()): from BagLearner import BagLearner #put seeds back for the moment np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed # Tweak kwargs # kwargs.update(inputs.get('kwargs', {})) # Read separate training and testing data files # with open(inputs['train_file']) as f: # data_partitions=list() testX,testY,trainX,trainY = None,None, None,None permutation = None author = None with util.get_learner_data_file(datafile) as f: alldata = np.genfromtxt(f,delimiter=',') # Skip the date column and header row if we're working on Istanbul data if datafile == 'Istanbul.csv': alldata = alldata[1:,1:] datasize = alldata.shape[0] cutoff = int(datasize*0.6) permutation = np.random.permutation(alldata.shape[0]) col_permutation = np.random.permutation(alldata.shape[1]-1) train_data = alldata[permutation[:cutoff],:] # trainX = train_data[:,:-1] trainX = train_data[:,col_permutation] trainY = train_data[:,-1] test_data = alldata[permutation[cutoff:],:] # testX = test_data[:,:-1] testX = test_data[:,col_permutation] testY = test_data[:,-1] msgs = [] if (group is "RTLearner") or (group is "DTLearner"): clss_name = RTLearner if group is "RTLearner" else DTLearner tree_sptc = 3 if group is "RTLearner" else 10 corr_in, corr_out, corr_in_50 = None,None,None def oneleaf(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner = clss_name(leaf_size=1,verbose=False) learner.addEvidence(trainX,trainY) insample = learner.query(trainX) outsample = learner.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed author_rv = None try: author_rv = learner.author() except: pass return insample, outsample, author_rv def fiftyleaves(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner = clss_name(leaf_size=50,verbose=False) learner.addEvidence(trainX,trainY) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return learner.query(trainX) predY_in, predY_out, author = run_with_timeout(oneleaf,tree_sptc,(),{}) predY_in_50 = run_with_timeout(fiftyleaves,tree_sptc,(),{}) corr_in = np.corrcoef(predY_in,y=trainY)[0,1] corr_out = np.corrcoef(predY_out,y=testY)[0,1] corr_in_50 = np.corrcoef(predY_in_50,y=trainY)[0,1] incorrect = False if corr_in < outputs['insample_corr_min'] or np.isnan(corr_in): incorrect = True msgs.append(" In-sample with leaf_size=1 correlation less than allowed: got {} expected {}".format(corr_in,outputs['insample_corr_min'])) else: points_earned += 1.0 if corr_out < outputs['outsample_corr_min'] or np.isnan(corr_out): incorrect = True msgs.append(" Out-of-sample correlation less than allowed: got {} expected {}".format(corr_out,outputs['outsample_corr_min'])) else: points_earned += 1.0 if corr_in_50 > outputs['insample_corr_max'] or np.isnan(corr_in_50): incorrect = True msgs.append(" In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}".format(corr_in_50,outputs['insample_corr_max'])) else: points_earned += 1.0 # Check author string if (author is None) or (author =='tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -2.0 elif group is "BagLearner": corr1, corr20 = None,None bag_sptc = 10 def onebag(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner1 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=1,boost=False,verbose=False) learner1.addEvidence(trainX,trainY) q_rv = learner1.query(testX) a_rv = learner1.author() np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return q_rv,a_rv def twentybags(): np.random.seed(seed) random.seed(seed) np.random.seed = fake_seed random.seed = fake_rseed learner20 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=20,boost=False,verbose=False) learner20.addEvidence(trainX,trainY) q_rv = learner20.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return q_rv predY1,author = run_with_timeout(onebag,bag_sptc,pos_args=(),keyword_args={}) predY20 = run_with_timeout(twentybags,bag_sptc,(),{}) corr1 = np.corrcoef(predY1,testY)[0,1] corr20 = np.corrcoef(predY20,testY)[0,1] incorrect = False # msgs = [] if corr20 <= corr1: incorrect = True msgs.append(" Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}".format(corr20,corr1)) else: points_earned += 2.0 # Check author string if (author is None) or (author=='tb34'): incorrect = True msgs.append(" Invalid author: {}".format(author)) points_earned += -1.0 elif group is "InsaneLearner": try: def insane(): import InsaneLearner as it learner = it.InsaneLearner(verbose=False) learner.addEvidence(trainX,trainY) Y = learner.query(testX) run_with_timeout(insane,10,pos_args=(),keyword_args={}) incorrect = False except Exception as e: incorrect = True msgs.append(" Exception calling InsaneLearner: {}".format(e)) points_earned = -10 elif group is "RandomName": try: il_name,il_code = gen_class() exec(il_code) in globals(), locals() il_cobj = eval(il_name) def rnd_name(): np.random.seed(seed) random.seed(seed) np.random.seed=fake_seed random.seed = fake_rseed learner = BagLearner(learner=il_cobj,kwargs={'verbose':False},bags=20,boost=False,verbose=False) learner.addEvidence(trainX,trainY) Y = learner.query(testX) np.random.seed = tmp_numpy_seed random.seed = tmp_random_seed return il_cobj.init_callcount_dict, il_cobj.add_callcount_dict, il_cobj.query_callcount_dict iccd, accd, qccd = run_with_timeout(rnd_name,10,pos_args=(),keyword_args={}) incorrect = False if (len(iccd)!=20) or (any([v!=1 for v in iccd.values()])): incorrect = True msgs.append(" Unexpected number of calls to __init__, sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(iccd),max(iccd.values()),min(iccd.values()))) points_earned = -10 if (len(accd)!=20) or (any([v!=1 for v in accd.values()])): incorrect = True msgs.append(" Unexpected number of calls to addEvidence sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(accd),max(accd.values()),min(accd.values()))) points_earned = -10 if (len(qccd)!=20) or (any([v!=1 for v in qccd.values()])): incorrect = True msgs.append(" Unexpected number of calls to query, sum={} (should be 20), max={} (should be 1), min={} (should be 1)".format(len(qccd),max(qccd.values()),min(qccd.values()))) points_earned = -10 except Exception as e: incorrect = True msgs.append(" Exception calling BagLearner: {}".format(e)) points_earned = -10 if incorrect: inputs_str = " data file: {}\n" \ " permutation: {}".format(datafile, permutation) raise IncorrectOutput("Test failed on one or more output criteria.\n Inputs:\n{}\n Failures:\n{}".format(inputs_str, "\n".join(msgs))) except Exception as e: # Test result: failed msg = "Description: {} (group: {})\n".format(description, group) # Generate a filtered stacktrace, only showing erroneous lines in student file(s) tb_list = tb.extract_tb(sys.exc_info()[2]) for i in range(len(tb_list)): row = tb_list[i] tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]) # show only filename instead of long absolute path tb_list = [row for row in tb_list if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py')] if tb_list: msg += "Traceback:\n" msg += ''.join(tb.format_list(tb_list)) # contains newlines msg += "{}: {}".format(e.__class__.__name__, str(e)) # Report failure result to grader, with stacktrace grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg)) raise else: # Test result: passed (no exceptions) grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))