Exemple #1
0
def test_strategy(description, insample_args, outsample_args, benchmark_type,
                  benchmark, impact, train_time, test_time, max_time, seed,
                  grader):
    """Test StrategyLearner.

    Requires test description, insample args (dict), outsample args (dict), benchmark_type (str), benchmark (float)
    max time (seconds), points for this test case (int), random seed (long), and a grader fixture.
    """
    points_earned = 0.0  # initialize points for this test case
    try:
        incorrect = True
        if not 'StrategyLearner' in globals():
            import importlib
            m = importlib.import_module('StrategyLearner')
            globals()['StrategyLearner'] = m
        outsample_cr_to_beat = None
        if benchmark_type == 'clean':
            outsample_cr_to_beat = benchmark

        def timeoutwrapper_strategylearner():
            #Set fixed seed for repetability

            np.random.seed(seed)
            random.seed(seed)
            learner = StrategyLearner.StrategyLearner(verbose=False,
                                                      impact=impact)

            tmp = time.time()

            learner.addEvidence(**insample_args)

            train_t = time.time() - tmp

            tmp = time.time()

            insample_trades_1 = learner.testPolicy(**insample_args)
            test_t = time.time() - tmp
            insample_trades_2 = learner.testPolicy(**insample_args)
            tmp = time.time()
            outsample_trades = learner.testPolicy(**outsample_args)
            out_test_t = time.time() - tmp

            return insample_trades_1, insample_trades_2, outsample_trades, train_t, test_t, out_test_t

        msgs = []

        in_trades_1, in_trades_2, out_trades, train_t, test_t, out_test_t = run_with_timeout(
            timeoutwrapper_strategylearner, max_time, (), {})

        incorrect = False
        if len(in_trades_1.shape) != 2 or in_trades_1.shape[1] != 1:
            incorrect = True
            msgs.append(
                "  First insample trades DF has invalid shape: {}".format(
                    in_trades_1.shape))
        elif len(in_trades_2.shape) != 2 or in_trades_2.shape[1] != 1:
            incorrect = True
            msgs.append(
                "  Second insample trades DF has invalid shape: {}".format(
                    in_trades_2.shape))
        elif len(out_trades.shape) != 2 or out_trades.shape[1] != 1:
            incorrect = True
            msgs.append(
                "  Out-of-sample trades DF has invalid shape: {}".format(
                    out_trades.shape))
        else:
            tmp_csum = 0.0
            for date, trade in in_trades_1.iterrows():
                tmp_csum += trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                    incorrect = True
                    msgs.append(
                        "  illegal trade in first insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}"
                        .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date,
                                trade))
                    break
                elif abs(tmp_csum) > MAX_HOLDINGS:
                    incorrect = True
                    msgs.append(
                        "  holdings more than {} long or short in first insample DF. Date {}, Trade {}"
                        .format(MAX_HOLDINGS, date, trade))
                    break
            tmp_csum = 0.0
            for date, trade in in_trades_2.iterrows():
                tmp_csum += trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                    incorrect = True
                    msgs.append(
                        "  illegal trade in second insample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}"
                        .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date,
                                trade))
                    break
                elif abs(tmp_csum) > MAX_HOLDINGS:
                    incorrect = True
                    msgs.append(
                        "  holdings more than {} long or short in second insample DF. Date {}, Trade {}"
                        .format(MAX_HOLDINGS, date, trade))
                    break
            tmp_csum = 0.0
            for date, trade in out_trades.iterrows():
                tmp_csum += trade.iloc[0]
                if (trade.iloc[0]!=0) and\
                   (trade.abs().iloc[0]!=MAX_HOLDINGS) and\
                   (trade.abs().iloc[0]!=2*MAX_HOLDINGS):
                    incorrect = True

                    msgs.append(
                        "  illegal trade in out-of-sample DF. abs(trade) not one of ({},{},{}).\n  Date {}, Trade {}"
                        .format(0, MAX_HOLDINGS, 2 * MAX_HOLDINGS, date,
                                trade))
                    break
                elif abs(tmp_csum) > MAX_HOLDINGS:
                    incorrect = True
                    msgs.append(
                        "  holdings more than {} long or short in out-of-sample DF. Date {}, Trade {}"
                        .format(MAX_HOLDINGS, date, trade))
                    break
            # if (((in_trades_1.abs()!=0) & (in_trades_1.abs()!=MAX_HOLDINGS) & (in_trades_1.abs()!=2*MAX_HOLDINGS)).any().any() or\
            #     ((in_trades_2.abs()!=0) & (in_trades_2.abs()!=MAX_HOLDINGS) & (in_trades_2.abs()!=2*MAX_HOLDINGS)).any().any() or\
            #     ((out_trades.abs()!=0)  & (out_trades.abs()!=MAX_HOLDINGS)  & (out_trades.abs()!=2*MAX_HOLDINGS)).any().any()):
            #     incorrect = True
            #     msgs.append("  illegal trade. abs(trades) not one of ({},{},{})".format(0,MAX_HOLDINGS,2*MAX_HOLDINGS))
            # if ((in_trades_1.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((in_trades_2.cumsum().abs()>MAX_HOLDINGS).any()[0]) or ((out_trades.cumsum().abs()>MAX_HOLDINGS).any()[0]):
            #     incorrect = True
            #     msgs.append("  holdings more than {} long or short".format(MAX_HOLDINGS))
        if not (incorrect):
            if train_t > train_time:
                incorrect = True
                msgs.append(
                    "  addEvidence() took {} seconds, max allowed {}".format(
                        train_t, train_time))
            else:
                points_earned += 1.0
            if test_t > test_time:
                incorrect = True
                msgs.append(
                    "  testPolicy() took {} seconds, max allowed {}".format(
                        test_t, test_time))
            else:
                points_earned += 2.0
            if not ((in_trades_1 == in_trades_2).all()[0]):
                incorrect = True
                mismatches = in_trades_1.join(in_trades_2,
                                              how='outer',
                                              lsuffix='1',
                                              rsuffix='2')
                mismatches = mismatches[mismatches.ix[:,
                                                      0] != mismatches.ix[:,
                                                                          1]]
                msgs.append(
                    "  consecutive calls to testPolicy() with same input did not produce same output:"
                )
                msgs.append("  Mismatched trades:\n {}".format(mismatches))
            else:
                points_earned += 2.0
            student_insample_cr = evalPolicy2(insample_args['symbol'],
                                              in_trades_1,
                                              insample_args['sv'],
                                              insample_args['sd'],
                                              insample_args['ed'],
                                              market_impact=impact,
                                              commission_cost=0.0)
            student_outsample_cr = evalPolicy2(outsample_args['symbol'],
                                               out_trades,
                                               outsample_args['sv'],
                                               outsample_args['sd'],
                                               outsample_args['ed'],
                                               market_impact=impact,
                                               commission_cost=0.0)
            if student_insample_cr <= benchmark:
                incorrect = True
                msgs.append(
                    "  in-sample return ({}) did not beat benchmark ({})".
                    format(student_insample_cr, benchmark))
            else:
                points_earned += 5.0
            if outsample_cr_to_beat is None:
                if out_test_t > test_time:
                    incorrect = True
                    msgs.append(
                        "  out-sample took {} seconds, max of {}".format(
                            out_test_t, test_time))
                else:
                    points_earned += 5.0
            else:
                if student_outsample_cr < outsample_cr_to_beat:
                    incorrect = True
                    msgs.append(
                        "  out-sample return ({}) did not beat benchmark ({})".
                        format(student_outsample_cr, outsample_cr_to_beat))
                else:
                    points_earned += 5.0
        if incorrect:
            inputs_str = "    insample_args: {}\n" \
                         "    outsample_args: {}\n" \
                         "    benchmark_type: {}\n" \
                         "    benchmark: {}\n" \
                         "    train_time: {}\n" \
                         "    test_time: {}\n" \
                         "    max_time: {}\n" \
                         "    seed: {}\n".format(insample_args, outsample_args, benchmark_type, benchmark, train_time, test_time, max_time,seed)
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(
                inputs_str, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        # tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        elif 'grading_traceback' in dir(e):
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(e.grading_traceback))
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))
def test_learners(description, group, datafile, seed, outputs, grader):
    """Test ML models returns correct predictions. 			  		 			 	 	 		 		 	  		   	  			  	
 			  		 			 	 	 		 		 	  		   	  			  	
    Requires test description, test case group, inputs, expected outputs, and a grader fixture. 			  		 			 	 	 		 		 	  		   	  			  	
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        learner_class = None
        kwargs = {'verbose': False}

        # (BPH) Copied from grade_strategy_qlearning.py
        #Set fixed seed for repetability
        np.random.seed(seed)
        random.seed(seed)
        #remove ability to seed either np.random or python random
        tmp_numpy_seed = np.random.seed
        tmp_random_seed = random.seed
        np.random.seed = fake_seed
        random.seed = fake_rseed

        # Try to import KNNLearner (only once)
        # if not 'KNNLearner' in globals():
        #     from KNNLearner import KNNLearner
        if not 'RTLearner' in globals():
            from RTLearner import RTLearner
        if not 'DTLearner' in globals():
            from DTLearner import DTLearner
        if (group is 'BagLearner') or (group is 'InsaneLearner') or (
                group is 'RandomName') and (not 'BagLearner' in globals()):
            from BagLearner import BagLearner
        #put seeds back for the moment
        np.random.seed = tmp_numpy_seed
        random.seed = tmp_random_seed
        # Tweak kwargs
        # kwargs.update(inputs.get('kwargs', {}))

        # Read separate training and testing data files
        # with open(inputs['train_file']) as f:
        # data_partitions=list()
        testX, testY, trainX, trainY = None, None, None, None
        permutation = None
        author = None
        with util.get_learner_data_file(datafile) as f:
            alldata = np.genfromtxt(f, delimiter=',')
            # Skip the date column and header row if we're working on Istanbul data
            if datafile == 'Istanbul.csv':
                alldata = alldata[1:, 1:]
            datasize = alldata.shape[0]
            cutoff = int(datasize * 0.6)
            permutation = np.random.permutation(alldata.shape[0])
            col_permutation = np.random.permutation(alldata.shape[1] - 1)
            train_data = alldata[permutation[:cutoff], :]
            # trainX = train_data[:,:-1]
            trainX = train_data[:, col_permutation]
            trainY = train_data[:, -1]
            test_data = alldata[permutation[cutoff:], :]
            # testX = test_data[:,:-1]
            testX = test_data[:, col_permutation]
            testY = test_data[:, -1]
        msgs = []

        if (group is "RTLearner") or (group is "DTLearner"):
            clss_name = RTLearner if group is "RTLearner" else DTLearner
            tree_sptc = 3 if group is "RTLearner" else 10
            corr_in, corr_out, corr_in_50 = None, None, None

            def oneleaf():
                np.random.seed(seed)
                random.seed(seed)
                np.random.seed = fake_seed
                random.seed = fake_rseed
                learner = clss_name(leaf_size=1, verbose=False)
                learner.addEvidence(trainX, trainY)
                insample = learner.query(trainX)
                outsample = learner.query(testX)
                np.random.seed = tmp_numpy_seed
                random.seed = tmp_random_seed
                author_rv = None
                try:
                    author_rv = learner.author()
                except:
                    pass
                return insample, outsample, author_rv

            def fiftyleaves():
                np.random.seed(seed)
                random.seed(seed)
                np.random.seed = fake_seed
                random.seed = fake_rseed
                learner = clss_name(leaf_size=50, verbose=False)
                learner.addEvidence(trainX, trainY)
                np.random.seed = tmp_numpy_seed
                random.seed = tmp_random_seed
                return learner.query(trainX)

            predY_in, predY_out, author = run_with_timeout(
                oneleaf, tree_sptc, (), {})
            predY_in_50 = run_with_timeout(fiftyleaves, tree_sptc, (), {})
            corr_in = np.corrcoef(predY_in, y=trainY)[0, 1]
            corr_out = np.corrcoef(predY_out, y=testY)[0, 1]
            corr_in_50 = np.corrcoef(predY_in_50, y=trainY)[0, 1]
            incorrect = False

            if corr_in < outputs['insample_corr_min'] or np.isnan(corr_in):
                incorrect = True
                msgs.append(
                    "    In-sample with leaf_size=1 correlation less than allowed: got {} expected {}"
                    .format(corr_in, outputs['insample_corr_min']))
            else:
                points_earned += 1.0
            if corr_out < outputs['outsample_corr_min'] or np.isnan(corr_out):
                incorrect = True
                msgs.append(
                    "    Out-of-sample correlation less than allowed: got {} expected {}"
                    .format(corr_out, outputs['outsample_corr_min']))
            else:
                points_earned += 1.0
            if corr_in_50 > outputs['insample_corr_max'] or np.isnan(
                    corr_in_50):
                incorrect = True
                msgs.append(
                    "    In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}"
                    .format(corr_in_50, outputs['insample_corr_max']))
            else:
                points_earned += 1.0
            # Check author string
            if (author is None) or (author == 'tb34'):
                incorrect = True
                msgs.append("    Invalid author: {}".format(author))
                points_earned += -2.0

        elif group is "BagLearner":
            corr1, corr20 = None, None
            bag_sptc = 10

            def onebag():
                np.random.seed(seed)
                random.seed(seed)
                np.random.seed = fake_seed
                random.seed = fake_rseed
                learner1 = BagLearner(learner=RTLearner,
                                      kwargs={"leaf_size": 1},
                                      bags=1,
                                      boost=False,
                                      verbose=False)
                learner1.addEvidence(trainX, trainY)
                q_rv = learner1.query(testX)
                a_rv = learner1.author()
                np.random.seed = tmp_numpy_seed
                random.seed = tmp_random_seed
                return q_rv, a_rv

            def twentybags():
                np.random.seed(seed)
                random.seed(seed)
                np.random.seed = fake_seed
                random.seed = fake_rseed
                learner20 = BagLearner(learner=RTLearner,
                                       kwargs={"leaf_size": 1},
                                       bags=20,
                                       boost=False,
                                       verbose=False)
                learner20.addEvidence(trainX, trainY)
                q_rv = learner20.query(testX)
                np.random.seed = tmp_numpy_seed
                random.seed = tmp_random_seed
                return q_rv

            predY1, author = run_with_timeout(onebag,
                                              bag_sptc,
                                              pos_args=(),
                                              keyword_args={})
            predY20 = run_with_timeout(twentybags, bag_sptc, (), {})

            corr1 = np.corrcoef(predY1, testY)[0, 1]
            corr20 = np.corrcoef(predY20, testY)[0, 1]
            incorrect = False
            # msgs = []
            if corr20 <= corr1:
                incorrect = True
                msgs.append(
                    "    Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}"
                    .format(corr20, corr1))
            else:
                points_earned += 2.0
            # Check author string
            if (author is None) or (author == 'tb34'):
                incorrect = True
                msgs.append("    Invalid author: {}".format(author))
                points_earned += -1.0
        elif group is "InsaneLearner":
            try:

                def insane():
                    import InsaneLearner as it
                    learner = it.InsaneLearner(verbose=False)
                    learner.addEvidence(trainX, trainY)
                    Y = learner.query(testX)

                run_with_timeout(insane, 10, pos_args=(), keyword_args={})
                incorrect = False
            except Exception as e:
                incorrect = True
                msgs.append(
                    "    Exception calling InsaneLearner: {}".format(e))
                points_earned = -10
        elif group is "RandomName":
            try:
                il_name, il_code = gen_class()
                exec(il_code) in globals(), locals()
                il_cobj = eval(il_name)

                def rnd_name():
                    np.random.seed(seed)
                    random.seed(seed)
                    np.random.seed = fake_seed
                    random.seed = fake_rseed
                    learner = BagLearner(learner=il_cobj,
                                         kwargs={'verbose': False},
                                         bags=20,
                                         boost=False,
                                         verbose=False)
                    learner.addEvidence(trainX, trainY)
                    Y = learner.query(testX)
                    np.random.seed = tmp_numpy_seed
                    random.seed = tmp_random_seed
                    return il_cobj.init_callcount_dict, il_cobj.add_callcount_dict, il_cobj.query_callcount_dict

                iccd, accd, qccd = run_with_timeout(rnd_name,
                                                    10,
                                                    pos_args=(),
                                                    keyword_args={})
                incorrect = False
                if (len(iccd) != 20) or (any([v != 1 for v in iccd.values()])):
                    incorrect = True
                    msgs.append(
                        "    Unexpected number of calls to __init__, sum={} (should be 20), max={} (should be 1), min={} (should be 1)"
                        .format(len(iccd), max(iccd.values()),
                                min(iccd.values())))
                    points_earned = -10
                if (len(accd) != 20) or (any([v != 1 for v in accd.values()])):
                    incorrect = True
                    msgs.append(
                        "    Unexpected number of calls to addEvidence sum={} (should be 20), max={} (should be 1), min={} (should be 1)"
                        .format(len(accd), max(accd.values()),
                                min(accd.values())))
                    points_earned = -10
                if (len(qccd) != 20) or (any([v != 1 for v in qccd.values()])):
                    incorrect = True
                    msgs.append(
                        "    Unexpected number of calls to query, sum={} (should be 20), max={} (should be 1), min={} (should be 1)"
                        .format(len(qccd), max(qccd.values()),
                                min(qccd.values())))
                    points_earned = -10
            except Exception as e:
                incorrect = True
                msgs.append("   Exception calling BagLearner: {}".format(e))
                points_earned = -10
        if incorrect:
            inputs_str = "    data file: {}\n" \
                         "    permutation: {}".format(datafile, permutation)
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(
                inputs_str, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Description: {} (group: {})\n".format(description, group)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        tb_list = [
            row for row in tb_list
            if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py')
        ]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #3
0
def test_qlearning(description, group, world_file, best_reward, median_reward, max_time, points, grader):
    points_earned = 0.0  # initialize points for this test case
    try:
        incorrect = True
        if not 'QLearner' in globals():
            import importlib
            m = importlib.import_module('QLearner')
            globals()['QLearner'] = m
        # Unpack test case
        world = np.array([list(map(float,s.strip().split(','))) for s in util.get_robot_world_file(world_file).readlines()])
        student_reward = None
        student_author = None
        msgs = []
        if group=='nodyna':
            def timeoutwrapper_nodyna():
                # Note: the following will NOT be commented durring final grading
                # random.seed(robot_qlearning_testing_seed)
                # np.random.seed(robot_qlearning_testing_seed)
                learner = QLearner.QLearner(num_states=100,\
                                            num_actions = 4, \
                                            alpha = 0.2, \
                                            gamma = 0.9, \
                                            rar = 0.98, \
                                            radr = 0.999, \
                                            dyna = 0, \
                                            verbose=False)
                return qltest(worldmap=world,iterations=500,max_steps=10000,learner=learner,verbose=False)
            student_reward = run_with_timeout(timeoutwrapper_nodyna,max_time,(),{})
            incorrect = False
            if student_reward < 1.5*median_reward:
                incorrect = True
                msgs.append("   Reward too low, expected %s, found %s"%(median_reward,student_reward))
        elif group=='dyna':
            def timeoutwrapper_dyna():
                # Note: the following will NOT be commented durring final grading
                # random.seed(robot_qlearning_testing_seed)
                # np.random.seed(robot_qlearning_testing_seed)
                learner = QLearner.QLearner(num_states=100,\
                                            num_actions = 4, \
                                            alpha = 0.2, \
                                            gamma = 0.9, \
                                            rar = 0.5, \
                                            radr = 0.99, \
                                            dyna = 200, \
                                            verbose=False)
                return qltest(worldmap=world,iterations=50,max_steps=10000,learner=learner,verbose=False)
            student_reward = run_with_timeout(timeoutwrapper_dyna,max_time,(),{})
            incorrect = False
            if student_reward < 1.5*median_reward:
                incorrect = True
                msgs.append("   Reward too low, expected %s, found %s"%(median_reward,student_reward))
        elif group=='author':
            points_earned = -20
            def timeoutwrapper_author():
                # Note: the following will NOT be commented durring final grading
                # random.seed(robot_qlearning_testing_seed)
                # np.random.seed(robot_qlearning_testing_seed)
                learner = QLearner.QLearner(num_states=100,\
                                            num_actions = 4, \
                                            alpha = 0.2, \
                                            gamma = 0.9, \
                                            rar = 0.98, \
                                            radr = 0.999, \
                                            dyna = 0, \
                                            verbose=False)
                return learner.author()
            student_author = run_with_timeout(timeoutwrapper_author,max_time,(),{})
            student_reward = best_reward+1
            incorrect = False
            if (student_author is None) or (student_author=='tb34'):
                incorrect = True
                msgs.append("   author() method not implemented correctly. Found {}".format(student_author))
            else:
                points_earned = points
        if (not incorrect):
            points_earned += points
        if incorrect:
            inputs_str = "    group: {}\n" \
                         "    world_file: {}\n"\
                         "    median_reward: {}\n".format(group, world_file, median_reward)
            raise IncorrectOutput("Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(inputs_str, "\n".join(msgs)))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in range(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3])  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if row[0] in ['QLearner.py','StrategyLearner.py']]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        elif 'grading_traceback' in dir(e):
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(e.grading_traceback))
        msg += "{}: {}".format(e.__class__.__name__, str(e))

        # Report failure result to grader, with stacktrace
        grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #4
0
def test_analysis(inputs, outputs, description, grader):
    """Test get_portfolio_value() and get_portfolio_stats() return correct values.

    Requires test inputs, expected outputs, description, and a grader fixture.
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        # Try to import student code (only once)
        if not main_code in globals():
            import importlib
            # * Import module
            mod = importlib.import_module(main_code)
            globals()[main_code] = mod

        # Unpack test case
        start_date_str = inputs['start_date'].split('-')
        start_date = datetime.datetime(int(start_date_str[0]),int(start_date_str[1]),int(start_date_str[2]))
        end_date_str = inputs['end_date'].split('-')
        end_date = datetime.datetime(int(end_date_str[0]),int(end_date_str[1]),int(end_date_str[2]))
        symbols = inputs['symbol_allocs'].keys()  # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM']
        allocs = inputs['symbol_allocs'].values()  # e.g.: [0.2, 0.3, 0.4, 0.1]
        start_val = inputs['start_val']
        risk_free_rate = inputs.get('risk_free_rate',0.0)

        # the wonky unpacking here is so that we only pull out the values we say we'll test.
        def timeoutwrapper_analysis():
            student_rv = analysis.assess_portfolio(\
                    sd=start_date, ed=end_date,\
                    syms=symbols,\
                    allocs=allocs,\
                    sv=start_val, rfr=risk_free_rate, sf=252.0, \
                    gen_plot=False)
            return student_rv
        result = run_with_timeout(timeoutwrapper_analysis,max_seconds_per_call,(),{})
        student_cr = result[0]
        student_adr = result[1]
        student_sr = result[3]
        port_stats = OrderedDict([('cum_ret',student_cr), ('avg_daily_ret',student_adr), ('sharpe_ratio',student_sr)])
        # Verify against expected outputs and assign points
        incorrect = False
        msgs = []
        for key, value in port_stats.iteritems():
            if abs(value - outputs[key]) > abs_margins[key]:
                incorrect = True
                msgs.append("    {}: {} (expected: {})".format(key, value, outputs[key]))
            else:
                points_earned += points_per_output[key]  # partial credit

        if incorrect:
            inputs_str = "    start_date: {}\n" \
                         "    end_date: {}\n" \
                         "    symbols: {}\n" \
                         "    allocs: {}\n" \
                         "    start_val: {}".format(start_date, end_date, symbols, allocs, start_val)
            raise IncorrectOutput, "One or more stats were incorrect.\n  Inputs:\n{}\n  Wrong values:\n{}".format(inputs_str, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)
        
        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3])  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if row[0] == 'analysis.py']
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #5
0
def test_learners(
    description,
    group,
    max_tests,
    needed_wins,
    row_limits,
    col_limits,
    seed,
    grader,
):
    """Test data generation methods beat given learner.  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    Requires test description, test case group, and a grader fixture.  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    """

    points_earned = 0.0  # initialize points for this test case
    incorrect = True
    msgs = []
    try:
        data_x, data_y = None, None
        same_data_x, same_data_y = None, None
        diff_data_x, diff_data_y = None, None
        better_learner, worse_learner = None, None
        if group == "author":
            try:
                from gen_data import author

                auth_string = run_with_timeout(author, seconds_per_test_case,
                                               (), {})
                if auth_string == "tb34":
                    incorrect = True
                    msgs.append("   Incorrect author name (tb34)")
                    points_earned = -10
                elif auth_string == "":
                    incorrect = True
                    msgs.append("   Empty author name")
                    points_earned = -10
                else:
                    incorrect = False
            except Exception as e:
                incorrect = True
                msgs.append(
                    "   Exception occured when calling author() method: {}".
                    format(e))
                points_earned = -10
        else:
            if group == "best4dt":
                from gen_data import best_4_dt

                data_x, data_y = run_with_timeout(best_4_dt,
                                                  seconds_per_test_case, (),
                                                  {"seed": seed})
                same_data_x, same_data_y = run_with_timeout(
                    best_4_dt, seconds_per_test_case, (), {"seed": seed})
                diff_data_x, diff_data_y = run_with_timeout(
                    best_4_dt, seconds_per_test_case, (), {"seed": seed + 1})
                better_learner = DTLearner
                worse_learner = LinRegLearner
            elif group == "best4lr":
                from gen_data import best_4_lin_reg

                data_x, data_y = run_with_timeout(best_4_lin_reg,
                                                  seconds_per_test_case, (),
                                                  {"seed": seed})
                same_data_x, same_data_y = run_with_timeout(
                    best_4_lin_reg, seconds_per_test_case, (), {"seed": seed})
                diff_data_x, diff_data_y = run_with_timeout(
                    best_4_lin_reg, seconds_per_test_case, (),
                    {"seed": seed + 1})
                better_learner = LinRegLearner
                worse_learner = DTLearner

            num_samples = data_x.shape[0]
            cutoff = int(num_samples * 0.6)
            worse_better_err = []
            for run in range(max_tests):
                permutation = np.random.permutation(num_samples)
                train_x, train_y = (
                    data_x[permutation[:cutoff]],
                    data_y[permutation[:cutoff]],
                )
                test_x, test_y = (
                    data_x[permutation[cutoff:]],
                    data_y[permutation[cutoff:]],
                )
                better = better_learner()
                worse = worse_learner()
                better.add_evidence(train_x, train_y)
                worse.add_evidence(train_x, train_y)
                better_pred = better.query(test_x)
                worse_pred = worse.query(test_x)
                better_err = np.linalg.norm(test_y - better_pred)
                worse_err = np.linalg.norm(test_y - worse_pred)
                worse_better_err.append((worse_err, better_err))
            worse_better_err.sort(key=functools.cmp_to_key(lambda a, b: int(
                (b[0] - b[1]) - (a[0] - a[1]))))
            better_wins_count = 0
            for worse_err, better_err in worse_better_err:
                if better_err < 0.9 * worse_err:
                    better_wins_count = better_wins_count + 1
                    points_earned += 5.0
                if better_wins_count >= needed_wins:
                    break
            incorrect = False
            if (data_x.shape[0] < row_limits[0]) or (data_x.shape[0] >
                                                     row_limits[1]):
                incorrect = True
                msgs.append("    Invalid number of rows. Should be between {},"
                            " found {}".format(row_limits, data_x.shape[0]))
                points_earned = max(0, points_earned - 20)
            if (data_x.shape[1] < col_limits[0]) or (data_x.shape[1] >
                                                     col_limits[1]):
                incorrect = True
                msgs.append(
                    "    Invalid number of columns. Should be between {},"
                    " found {}".format(col_limits, data_x.shape[1]))
                points_earned = max(0, points_earned - 20)
            if better_wins_count < needed_wins:
                incorrect = True
                msgs.append(
                    "    Better learner did not exceed worse learner. Expected"
                    " {}, found {}".format(needed_wins, better_wins_count))
            if not (np.array_equal(same_data_y, data_y)) or not (
                    np.array_equal(same_data_x, data_x)):
                incorrect = True
                msgs.append(
                    "    Did not produce the same data with the same seed.\n" +
                    "      First data_x:\n{}\n".format(data_x) +
                    "      Second data_x:\n{}\n".format(same_data_x) +
                    "      First data_y:\n{}\n".format(data_y) +
                    "      Second data_y:\n{}\n".format(same_data_y))
                points_earned = max(0, points_earned - 20)
            if np.array_equal(diff_data_y, data_y) and np.array_equal(
                    diff_data_x, data_x):
                incorrect = True
                msgs.append("    Did not produce different data with different"
                            " seeds.\n" +
                            "      First data_x:\n{}\n".format(data_x) +
                            "      Second data_x:\n{}\n".format(diff_data_x) +
                            "      First data_y:\n{}\n".format(data_y) +
                            "      Second data_y:\n{}\n".format(diff_data_y))
                points_earned = max(0, points_earned - 20)
        if incorrect:
            if group == "author":
                raise IncorrectOutput(
                    "Test failed on one or more criteria.\n  {}".format(
                        "\n".join(msgs)))
            else:
                inputs_str = "    Residuals: {}".format(worse_better_err)
                raise IncorrectOutput(
                    "Test failed on one or more output criteria.\n "
                    " Inputs:\n{}\n  Failures:\n{}".format(
                        inputs_str, "\n".join(msgs)))
        else:
            if group != "author":
                avg_ratio = 0.0
                worse_better_err.sort(key=functools.cmp_to_key(
                    lambda a, b: int(np.sign((b[0] - b[1]) - (a[0] - a[1])))))
                for we, be in worse_better_err[:10]:
                    avg_ratio += float(we) - float(be)
                avg_ratio = avg_ratio / 10.0
                if group == "best4dt":
                    grader.add_performance(np.array([avg_ratio, 0]))
                else:
                    grader.add_performance(np.array([0, avg_ratio]))
    except Exception as e:
        # Test result: failed
        msg = "Description: {} (group: {})\n".format(description, group)
        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in range(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (
                os.path.basename(row[0]),
                row[1],
                row[2],
                row[3],
            )  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if (row[0] == "gen_data.py")]
        if tb_list:
            msg += "Traceback:\n"
            msg += "".join(tb.format_list(tb_list))  # contains newlines
        elif "grading_traceback" in dir(e):
            msg += "Traceback:\n"
            msg += "".join(tb.format_list(e.grading_traceback))
        msg += "{}: {}".format(e.__class__.__name__, str(e))

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome="failed", points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome="passed", points=points_earned, msg=None))
Exemple #6
0
def test_optimization(inputs, outputs, description, grader):
    """Test find_optimal_allocations() returns correct allocations.  		   	  			  	 		  		  		    	 		 		   		 		  
  		   	  			  	 		  		  		    	 		 		   		 		  
    Requires test inputs, expected outputs, description, and a grader fixture.  		   	  			  	 		  		  		    	 		 		   		 		  
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        # Try to import student code (only once)
        if not main_code in globals():
            import importlib
            # * Import module
            mod = importlib.import_module(main_code)
            globals()[main_code] = mod
            # * Import methods to test (refactored out, spring 2016, --BPH)
            # for m in ['find_optimal_allocations']:
            #     globals()[m] = getattr(mod, m)

        # Unpack test case
        start_date = inputs['start_date']
        end_date = inputs['end_date']
        symbols = inputs['symbols']  # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM']

        # Read in adjusted closing prices for given symbols, date range
        # dates = pd.date_range(start_date, end_date)
        # prices_all = get_data(symbols, dates)  # automatically adds SPY
        # prices = prices_all[symbols]  # only portfolio symbols

        # Run student code with time limit (in seconds, per test case)
        port_stats = {}
        with time_limit(seconds_per_test_case):
            # * Find optimal allocations
            student_allocs, student_cr, student_adr, student_sddr, student_sr = optimization.optimize_portfolio(
                sd=start_date, ed=end_date, syms=symbols, gen_plot=False)
            student_allocs = np.float32(
                student_allocs
            )  # make sure it's a NumPy array, for easier computation

        # Verify against expected outputs and assign points
        incorrect = False
        msgs = []
        correct_allocs = outputs['allocs']

        # * Check sum_to_one: Allocations sum to 1.0 +/- margin
        sum_allocs = np.sum(student_allocs)
        if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']:
            incorrect = True
            msgs.append("    sum of allocations: {} (expected: 1.0)".format(
                sum_allocs))
            student_allocs = student_allocs / sum_allocs  # normalize allocations, if they don't sum to 1.0
        else:
            points_earned += points_per_component['sum_to_one']

        # * Get daily portfolio value and statistics, for comparison
        #port_val = get_portfolio_value(prices, allocs, start_val)
        #cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val)

        # * Check alloc_range: Each allocation is within [0.0, 1.0] +/- margin
        # * Check alloc_match: Each allocation matches expected value +/- margin
        points_per_alloc_range = points_per_component['alloc_range'] / len(
            correct_allocs)
        points_per_alloc_match = points_per_component['alloc_match'] / len(
            correct_allocs)
        for symbol, alloc, correct_alloc in zip(symbols, student_allocs,
                                                correct_allocs):
            if alloc < -abs_margins['alloc_range'] or alloc > (
                    1.0 + abs_margins['alloc_range']):
                incorrect = True
                msgs.append(
                    "    {} - allocation out of range: {} (expected: [0.0, 1.0])"
                    .format(symbol, alloc))
            else:
                points_earned += points_per_alloc_range
                if abs(alloc - correct_alloc) > abs_margins['alloc_match']:
                    incorrect = True
                    msgs.append(
                        "    {} - incorrect allocation: {} (expected: {})".
                        format(symbol, alloc, correct_alloc))
                else:
                    points_earned += points_per_alloc_match
        #points_earned = round(points_earned)  # round off points earned to nearest integer (?)

        if incorrect:
            inputs_str = "    start_date: {}\n" \
                         "    end_date: {}\n" \
                         "    symbols: {}\n".format(start_date, end_date, symbols)
            # If there are problems with the stats and all of the values returned match the template code, exactly, then award 0 points
            #if check_template(student_allocs, student_cr, student_adr, student_sddr, student_sr):
            points_earned = 0
            raise IncorrectOutput(
                "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}"
                .format(inputs_str, "\n".join(msgs)))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in range(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if row[0] == 'optimization.py']
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, str(e))

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #7
0
def test_marketsim(description, group, inputs, outputs, grader):
    """Test compute_portvals() returns correct daily portfolio values.

    Requires test description, test case group, inputs, expected outputs, and a grader fixture.
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        # Try to import student code (only once)
        if not main_code in globals():
            import importlib
            # * Import module
            mod = importlib.import_module(main_code)
            globals()[main_code] = mod
            # * Import methods to test
            for m in ['compute_portvals']:
                globals()[m] = getattr(mod, m)

        incorrect = False
        msgs = []

        if group == 'author':
            try:
                auth_string = run_with_timeout(marketsim.author,
                                               seconds_per_test_case, (), {})
                if auth_string == 'tb34':
                    incorrect = True
                    msgs.append("   Incorrect author name (tb34)")
                    points_earned = -10
                elif auth_string == '':
                    incorrect = True
                    msgs.append("   Empty author name")
                    points_earned = -10
            except Exception as e:
                incorrect = True
                msgs.append(
                    "   Exception occured when calling author() method: {}".
                    format(e))
                points_earned = -10
        else:
            # Unpack test case
            orders_file = inputs['orders_file']
            start_val = inputs['start_val']
            impct = inputs['impact']
            commish = inputs['commission']

            portvals = None
            fullpath_orders_file = get_orders_data_file(orders_file)
            portvals = run_with_timeout(
                compute_portvals, seconds_per_test_case, (), {
                    'orders_file': fullpath_orders_file,
                    'start_val': start_val,
                    'commission': commish,
                    'impact': impct
                })

            # * Check return type is correct, coax into Series
            assert (type(portvals) == pd.Series) or (
                type(portvals) == pd.DataFrame and len(portvals.columns)
                == 1), "You must return a Series or single-column DataFrame!"
            if type(portvals) == pd.DataFrame:
                portvals = portvals[portvals.columns[
                    0]]  # convert single-column DataFrame to Series
            if group == 'basic':
                if len(portvals) != outputs['num_days']:
                    incorrect = True
                    msgs.append(
                        "   Incorrect number of days: {}, expected {}".format(
                            len(portvals), outputs['num_days']))
                else:
                    points_earned += 2.0
                if abs(portvals[-1] - outputs['last_day_portval']) > (
                        0.001 * outputs['last_day_portval']):
                    incorrect = True
                    msgs.append(
                        "   Incorrect final value: {}, expected {}".format(
                            portvals[-1], outputs['last_day_portval']))
                else:
                    points_earned += 5.0
                adr, sr = get_stats(portvals)
                if abs(sr - outputs['sharpe_ratio']) > abs(
                        0.001 * outputs['sharpe_ratio']):
                    incorrect = True
                    msgs.append(
                        "   Incorrect sharpe ratio: {}, expected {}".format(
                            sr, outputs['sharpe_ratio']))
                else:
                    points_earned += 1.0
                if abs(adr - outputs['avg_daily_ret']) > abs(
                        0.001 * outputs['avg_daily_ret']):
                    incorrect = True
                    msgs.append(
                        "   Incorrect avg daily return: {}, expected {}".
                        format(adr, outputs['avg_daily_ret']))
                else:
                    points_earned += 1.0
            elif group == 'commission' or group == 'impact' or group == 'both':
                if abs(portvals[-1] - outputs['last_day_portval']) > 0.001:
                    incorrect = True
                    msgs.append(
                        "   Incorrect final value: {}, expected {}".format(
                            portvals[-1], outputs['last_day_portval']))
                else:
                    points_earned += 2.0
        if incorrect:
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(
                inputs, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)

        tb_list = tb.extract_tb(sys.exc_info()[2])
        if 'grading_traceback' in dir(e):
            tb_list = e.grading_traceback
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if row[0] == 'marketsim_old.py']
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed',
                        points=max(points_earned, 0),
                        msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))
def test_learners(description, group, max_tests, needed_wins, row_limits,
                  col_limits, seed, grader):
    """Test data generation methods beat given learner.

    Requires test description, test case group, and a grader fixture.
    """

    points_earned = 0.0  # initialize points for this test case
    incorrect = True
    try:
        # Try to import KNNLearner (only once)
        # if not 'KNNLearner' in globals():
        #     from KNNLearner import KNNLearner
        dataX, dataY = None, None
        same_dataX, same_dataY = None, None
        diff_dataX, diff_dataY = None, None
        betterLearner, worseLearner = None, None
        if group == "best4rt":
            from gen_data import best4RT
            dataX, dataY = run_with_timeout(best4RT, seconds_per_test_case, (),
                                            {'seed': seed})
            same_dataX, same_dataY = run_with_timeout(best4RT,
                                                      seconds_per_test_case,
                                                      (), {'seed': seed})
            diff_dataX, diff_dataY = run_with_timeout(best4RT,
                                                      seconds_per_test_case,
                                                      (), {'seed': seed + 1})
            betterLearner = RTLearner
            worseLearner = LinRegLearner
        else:
            from gen_data import best4LinReg
            dataX, dataY = run_with_timeout(best4LinReg, seconds_per_test_case,
                                            (), {'seed': seed})
            same_dataX, same_dataY = run_with_timeout(best4LinReg,
                                                      seconds_per_test_case,
                                                      (), {'seed': seed})
            diff_dataX, diff_dataY = run_with_timeout(best4LinReg,
                                                      seconds_per_test_case,
                                                      (), {'seed': seed + 1})
            betterLearner = LinRegLearner
            worseLearner = RTLearner

        num_samples = dataX.shape[0]
        cutoff = int(num_samples * 0.6)
        worse_better_err = []
        for run in range(max_tests):
            permutation = np.random.permutation(num_samples)
            train_X, train_Y = dataX[permutation[:cutoff]], dataY[
                permutation[:cutoff]]
            test_X, test_Y = dataX[permutation[cutoff:]], dataY[
                permutation[cutoff:]]
            better = betterLearner()
            worse = worseLearner()
            better.addEvidence(train_X, train_Y)
            worse.addEvidence(train_X, train_Y)
            better_pred = better.query(test_X)
            worse_pred = worse.query(test_X)
            better_err = np.linalg.norm(test_Y - better_pred)
            worse_err = np.linalg.norm(test_Y - worse_pred)
            worse_better_err.append((worse_err, better_err))
        worse_better_err.sort(lambda a, b: int((b[0] - b[1]) - (a[0] - a[1])))
        better_wins_count = 0
        for worse_err, better_err in worse_better_err:
            if better_err < 0.9 * worse_err:
                better_wins_count = better_wins_count + 1
                points_earned += 5.0
            if better_wins_count >= needed_wins:
                break
        incorrect = False
        msgs = []
        if (dataX.shape[0] < row_limits[0]) or (dataX.shape[0] >
                                                row_limits[1]):
            incorrect = True
            msgs.append(
                "    Invalid number of rows. Should be between {}, found {}".
                format(row_limits, dataX.shape[0]))
            points_earned = max(0, points_earned - 20)
        if (dataX.shape[1] < col_limits[0]) or (dataX.shape[1] >
                                                col_limits[1]):
            incorrect = True
            msgs.append(
                "    Invalid number of columns. Should be between {}, found {}"
                .format(col_limits, dataX.shape[1]))
            points_earned = max(0, points_earned - 20)
        if better_wins_count < needed_wins:
            incorrect = True
            msgs.append(
                "    Better learner did not exceed worse learner. Expected {}, found {}"
                .format(needed_wins, better_wins_count))
        if not (np.array_equal(same_dataY, dataY)) or not (np.array_equal(
                same_dataX, dataX)):
            incorrect = True
            msgs.append("    Did not produce the same data with the same seed.\n"+\
                        "      First dataX:\n{}\n".format(dataX)+\
                        "      Second dataX:\n{}\n".format(same_dataX)+\
                        "      First dataY:\n{}\n".format(dataY)+\
                        "      Second dataY:\n{}\n".format(same_dataY))
            points_earned = max(0, points_earned - 20)
        if np.array_equal(diff_dataY, dataY) and np.array_equal(
                diff_dataX, dataX):
            incorrect = True
            msgs.append("    Did not produce different data with different seeds.\n"+\
                        "      First dataX:\n{}\n".format(dataX)+\
                        "      Second dataX:\n{}\n".format(diff_dataX)+\
                        "      First dataY:\n{}\n".format(dataY)+\
                        "      Second dataY:\n{}\n".format(diff_dataY))
            points_earned = max(0, points_earned - 20)
        if incorrect:
            inputs_str = "    Residuals: {}".format(worse_better_err)
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(
                inputs_str, "\n".join(msgs))
        else:
            avg_ratio = 0.0
            worse_better_err.sort(
                lambda a, b: int(np.sign((b[0] - b[1]) - (a[0] - a[1]))))
            for we, be in worse_better_err[:10]:
                avg_ratio += (float(we) - float(be))
            avg_ratio = avg_ratio / 10.0
            if group == "best4rt":
                grader.add_performance(np.array([avg_ratio, 0]))
            else:
                grader.add_performance(np.array([0, avg_ratio]))
    except Exception as e:
        # Test result: failed
        msg = "Description: {} (group: {})\n".format(description, group)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if (row[0] == 'gen_data.py')]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        elif 'grading_traceback' in dir(e):
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(e.grading_traceback))
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #9
0
def test_optimization(inputs, outputs, description, seed, grader):
    """Test find_optimal_allocations() returns correct allocations.

    Requires test inputs, expected outputs, description, and a grader fixture.
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        # Try to import student code (only once)
        if not main_code in globals():
            import importlib
            # * Import module
            nprs_func = np.random.seed; rs_func = random.seed
            np.random.seed = fake_seed; random.seed = fake_seed;
            mod = importlib.import_module(main_code)
            globals()[main_code] = mod
            np.random.seed = nprs_func
            random.seed = rs_func

        # Unpack test case
        start_date = inputs['start_date']
        end_date = inputs['end_date']
        symbols = inputs['symbols']  # e.g.: ['GOOG', 'AAPL', 'GLD', 'XOM']

        def timeoutwrapper_optimize():
            np.random.seed(seed); random.seed(seed)
            nprs_func = np.random.seed; rs_func = random.seed
            np.random.seed = fake_seed; random.seed = fake_seed
            s_allocs, s_cr, s_adr, s_sddr, s_sr = optimization.optimize_portfolio(sd=start_date, ed=end_date, syms=symbols, gen_plot=False)
            s_allocs = np.float32(s_allocs)
            np.random.seed = nprs_func
            random.seed = rs_func
            return s_allocs
        student_allocs = run_with_timeout(timeoutwrapper_optimize,seconds_per_test_case,(),{})

        # Verify against expected outputs and assign points
        incorrect = False
        msgs = []
        correct_allocs = outputs['allocs']
        benchmark_value = outputs['benchmark']

        # * Check sum_to_one: Allocations sum to 1.0 +/- margin
        sum_allocs = np.sum(student_allocs)
        if abs(sum_allocs - 1.0) > abs_margins['sum_to_one']:
            incorrect = True
            msgs.append("    sum of allocations: {} (expected: 1.0)".format(sum_allocs))
            student_allocs = student_allocs / sum_allocs  # normalize allocations, if they don't sum to 1.0
        else:
            points_earned += points_per_component['sum_to_one']

        points_per_alloc_range = points_per_component['alloc_range'] / len(correct_allocs)
        for symbol, alloc in zip(symbols,student_allocs):
            if alloc < -abs_margins['alloc_range'] or alloc > (1.0+abs_margins['alloc_range']):
                incorrect = True
                msgs.append("    {} - allocation out of range: {} (expected [0.0, 1.0)".format(symbol,alloc))
            else:
                points_earned += points_per_alloc_range
        student_allocs_sddr = alloc2sddr(student_allocs,inputs)
        if student_allocs_sddr/benchmark_value - 1.0 > abs_margins['sddr_match']:
            incorrect = True
            msgs.append("    Sddr too large: {} (expected < {} + {})".format(student_allocs_sddr, benchmark_value, benchmark_value*abs_margins['sddr_match']))
        else:
            points_earned += points_per_component['benchmark_match']

        if incorrect:
            inputs_str = "    start_date: {}\n" \
                         "    end_date: {}\n" \
                         "    symbols: {}\n".format(start_date, end_date, symbols)
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(inputs_str, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Test case description: {}\n".format(description)
        
        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3])  # show only filename instead of long absolute path
        tb_list = [row for row in tb_list if row[0] == 'optimization.py']
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(GradeResult(outcome='passed', points=points_earned, msg=None))
Exemple #10
0
def test_learners(description, group, datafile, seed, outputs, grader):
    """Test ML models returns correct predictions.

    Requires test description, test case group, inputs, expected outputs, and a grader fixture.
    """

    points_earned = 0.0  # initialize points for this test case
    try:
        learner_class = None
        kwargs = {'verbose': False}

        # (BPH) Copied from grade_strategy_qlearning.py
        #Set fixed seed for repetability
        np.random.seed(seed)
        random.seed(seed)
        # These lines will be uncommented in the batch grader to
        # prevent accidentally fixing the seed within student
        # code
        # tmp_numpy_seed = np.random.seed
        # tmp_random_seed = random.seed
        # np.random.seed = fake_seed
        # random.seed = fake_rseed

        # Try to import KNNLearner (only once)
        # if not 'KNNLearner' in globals():
        #     from KNNLearner import KNNLearner
        if not 'RTLearner' in globals():
            from RTLearner import RTLearner
        if group is 'BagLearner' and (not 'BagLearner' in globals()):
            from BagLearner import BagLearner

        # Tweak kwargs
        # kwargs.update(inputs.get('kwargs', {}))

        # Read separate training and testing data files
        # with open(inputs['train_file']) as f:
        # data_partitions=list()
        testX, testY, trainX, trainY = None, None, None, None
        permutation = None
        author = None
        with util.get_learner_data_file(datafile) as f:
            alldata = np.genfromtxt(f, delimiter=',')
            # Skip the date column and header row if we're working on Istanbul data
            if datafile == 'Istanbul.csv':
                alldata = alldata[1:, 1:]
            datasize = alldata.shape[0]
            cutoff = int(datasize * 0.6)
            permutation = np.random.permutation(alldata.shape[0])
            col_permutation = np.random.permutation(alldata.shape[1] - 1)
            train_data = alldata[permutation[:cutoff], :]
            # trainX = train_data[:,:-1]
            trainX = train_data[:, col_permutation]
            trainY = train_data[:, -1]
            test_data = alldata[permutation[cutoff:], :]
            # testX = test_data[:,:-1]
            testX = test_data[:, col_permutation]
            testY = test_data[:, -1]

        if group is "RTLearner":
            corr_in, corr_out, corr_in_50 = None, None, None

            def oneleaf():
                learner = RTLearner(leaf_size=1, verbose=False)
                learner.addEvidence(trainX, trainY)
                insample = learner.query(trainX)
                outsample = learner.query(testX)
                return insample, outsample, learner.author()

            def fiftyleaves():
                learner = RTLearner(leaf_size=50, verbose=False)
                learner.addEvidence(trainX, trainY)
                return learner.query(trainX)

            predY_in, predY_out, author = run_with_timeout(
                oneleaf, seconds_per_test_case, (), {})
            predY_in_50 = run_with_timeout(fiftyleaves, seconds_per_test_case,
                                           (), {})
            corr_in = np.corrcoef(predY_in, y=trainY)[0, 1]
            corr_out = np.corrcoef(predY_out, y=testY)[0, 1]
            corr_in_50 = np.corrcoef(predY_in_50, y=trainY)[0, 1]
            incorrect = False

            msgs = []
            if corr_in < outputs['insample_corr_min']:
                incorrect = True
                msgs.append(
                    "    In-sample with leaf_size=1 correlation less than allowed: got {} expected {}"
                    .format(corr_in, outputs['insample_corr_min']))
            else:
                points_earned += 1.5
            if corr_out < outputs['outsample_corr_min']:
                incorrect = True
                msgs.append(
                    "    Out-of-sample correlation less than allowed: got {} expected {}"
                    .format(corr_out, outputs['outsample_corr_min']))
            else:
                points_earned += 1.5
            if corr_in_50 > outputs['insample_corr_max']:
                incorrect = True
                msgs.append(
                    "    In-sample correlation with leaf_size=50 greater than allowed: got {} expected {}"
                    .format(corr_in_50, outputs['insample_corr_max']))
            else:
                points_earned += 1.0
            # Check author string
            if (author is None) or (author == 'tb34'):
                incorrect = True
                msgs.append("    Invalid author: {}".format(author))
                points_earned += -1.0

        elif group is "BagLearner":
            corr1, corr20 = None, None

            def onebag():
                learner1 = BagLearner(learner=RTLearner,
                                      kwargs={"leaf_size": 1},
                                      bags=1,
                                      boost=False,
                                      verbose=False)
                learner1.addEvidence(trainX, trainY)
                return learner1.query(testX), learner1.author()

            def twentybags():
                learner20 = BagLearner(learner=RTLearner,
                                       kwargs={"leaf_size": 1},
                                       bags=20,
                                       boost=False,
                                       verbose=False)
                learner20.addEvidence(trainX, trainY)
                return learner20.query(testX)

            predY1, author = run_with_timeout(onebag,
                                              seconds_per_test_case,
                                              pos_args=(),
                                              keyword_args={})
            predY20 = run_with_timeout(twentybags, seconds_per_test_case, (),
                                       {})

            corr1 = np.corrcoef(predY1, testY)[0, 1]
            corr20 = np.corrcoef(predY20, testY)[0, 1]
            incorrect = False
            msgs = []
            if corr20 <= corr1:
                incorrect = True
                msgs.append(
                    "    Out-of-sample correlation for 20 bags is not greater than for 1 bag. 20 bags:{}, 1 bag:{}"
                    .format(corr20, corr1))
            else:
                points_earned += 2.0
            # Check author string
            if (author is None) or (author == 'tb34'):
                incorrect = True
                msgs.append("    Invalid author: {}".format(author))
                points_earned += -1.0

        if incorrect:
            inputs_str = "    data file: {}\n" \
                         "    permutation: {}".format(datafile, permutation)
            raise IncorrectOutput, "Test failed on one or more output criteria.\n  Inputs:\n{}\n  Failures:\n{}".format(
                inputs_str, "\n".join(msgs))
    except Exception as e:
        # Test result: failed
        msg = "Description: {} (group: {})\n".format(description, group)

        # Generate a filtered stacktrace, only showing erroneous lines in student file(s)
        tb_list = tb.extract_tb(sys.exc_info()[2])
        for i in xrange(len(tb_list)):
            row = tb_list[i]
            tb_list[i] = (os.path.basename(row[0]), row[1], row[2], row[3]
                          )  # show only filename instead of long absolute path
        tb_list = [
            row for row in tb_list
            if (row[0] == 'RTLearner.py') or (row[0] == 'BagLearner.py')
        ]
        if tb_list:
            msg += "Traceback:\n"
            msg += ''.join(tb.format_list(tb_list))  # contains newlines
        msg += "{}: {}".format(e.__class__.__name__, e.message)

        # Report failure result to grader, with stacktrace
        grader.add_result(
            GradeResult(outcome='failed', points=points_earned, msg=msg))
        raise
    else:
        # Test result: passed (no exceptions)
        grader.add_result(
            GradeResult(outcome='passed', points=points_earned, msg=None))