Ejemplo n.º 1
0
 def __init__(self):
     """
     constructor
     """
     self.model = MyDict()  # key is question id, value is model
     #self.model_path = '/Volumes/E/workspace/IntelligentJudgmentModel/src/com/aic/pij/model/'
     self.model_path = main_path().rsplit('/', 1)[0] + '/model/'
     model_name = self.eachFile(self.model_path)
     print('Model loading ...')
     for ele in model_name:
         #ele looks like '2017_01_qm_11_1.svm.cf.m'
         print('  ', ele)
         ele_list = ele.split('.')
         question_model = self.load_model(self.model_path + ele,
                                          ele_list[-1])
         self.model.add(key=ele_list[0],
                        sub_key=ele_list[1],
                        value=[question_model, ele_list[2], ele_list[-1]])
     print('Model loading completed ...')
Ejemplo n.º 2
0
class TestStatesCombiner(TestCase):

    params = MyDict()
    params.log = sys.stdout
    params.states_list = [['A', 'B'], ['C', 'D']]
    states = StatesCombiner(params)

    def test_combine(self):
        self.assertIsInstance(self.states, StatesCombiner)
        self.assertEqual(self.states.max_id, 4)
        self.assertEqual(len(self.states.state), 4)

        self.assertEqual(self.states.state['A_C'], 0)
        self.assertEqual(self.states.state['A_D'], 1)
        self.assertEqual(self.states.state['B_C'], 2)
        self.assertEqual(self.states.state['B_D'], 3)

        self.assertEqual(self.states.ivd[0], 'A_C')
        self.assertEqual(self.states.ivd[1], 'A_D')
        self.assertEqual(self.states.ivd[2], 'B_C')
        self.assertEqual(self.states.ivd[3], 'B_D')

    def test_get_id(self):
        self.assertEqual(self.states.get_id(*['A', 'C']), 0)
        self.assertEqual(self.states.get_id(*['A', 'D']), 1)
        self.assertEqual(self.states.get_id(*['B', 'C']), 2)
        self.assertEqual(self.states.get_id(*['B', 'D']), 3)

        try:
            self.states.get_id(*['A', 'B'])
        except Exception as e:
            self.assertIsInstance(e, AssertionError)

    def test_name(self):
        self.assertEqual(self.states.name(0), 'A_C')
        self.assertEqual(self.states.name(1), 'A_D')
        self.assertEqual(self.states.name(2), 'B_C')
        self.assertEqual(self.states.name(3), 'B_D')

        try:
            self.states.name(5)
        except Exception as e:
            self.assertIsInstance(e, AssertionError)
Ejemplo n.º 3
0
class TestCSEncoder(TestCase):
    params = MyDict()
    params.log = Logger(4)
    # params.log = MyDict()
    # params.log.debug = do_nothing
    # params.log.info = do_nothing
    params.input_file = 'DAX100.csv'
    params.subtypes = ['body', 'move']
    params.csv_dict = {
        'd': 'Date',
        'o': 'Open',
        'h': 'High',
        'l': 'Low',
        'c': 'Close'
    }
    params.cse_tags = ['b', 'o', 'h', 'l', 'c']

    @classmethod
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test
        use setUpClass() and store the result as class variable
        """
        super(TestCSEncoder, cls).setUpClass()
        cls.data = sample_ticks()

    def test_CSEncoder(self):
        """
        Test the constructor. Normally called without any tick in the
        arguments, it
        """
        cse = CSEncoder(self.params)
        self.assertEqual(cse.open, 0.)
        self.assertEqual(cse.close, 0.)
        self.assertEqual(cse.high, 0.)
        self.assertEqual(cse.low, 0.)
        self.assertEqual(cse.min, 0.)
        self.assertEqual(cse.max, 0.)

        # Initialization
        self.assertEqual(cse.encoded_delta_close, 'pA')
        self.assertEqual(cse.encoded_delta_high, 'pA')
        self.assertEqual(cse.encoded_delta_low, 'pA')
        self.assertEqual(cse.encoded_delta_max, 'pA')
        self.assertEqual(cse.encoded_delta_min, 'pA')
        self.assertEqual(cse.encoded_delta_open, 'pA')

    def test_correct_encoding(self):
        """
        Test if this method is correctly capturing that column names reflect
        what the encoding is saying.
        """
        self.assertTrue(
            CSEncoder(self.params, encoding='ohlc')._correct_encoding())
        self.assertTrue(
            CSEncoder(self.params, encoding='OHLC')._correct_encoding())
        self.assertFalse(
            CSEncoder(self.params, encoding='0hlc')._correct_encoding())
        self.assertFalse(
            CSEncoder(self.params, encoding='ohl')._correct_encoding())
        self.assertFalse(
            CSEncoder(self.params, encoding='')._correct_encoding())

    def test_fit(self):
        """
        Measure main indicators for encoding of the second tick in the
        test data. I use the second one to be able to compare it against
        the first one.
        """
        cs = CSEncoder(self.params).fit(self.data)
        self.assertEqual(cs.cse_zero_open, 50.)
        self.assertEqual(cs.cse_zero_high, 100.)
        self.assertEqual(cs.cse_zero_low, 0.)
        self.assertEqual(cs.cse_zero_close, 50.5)
        self.assertTrue(cs.fitted)
        # Check that I've two css and they're the correct type
        self.assertEqual(len(cs.onehot), 2)
        for subtype in self.params.subtypes:
            self.assertIsNotNone(cs.onehot[subtype])

    def test_add_ohencoder(self):
        """ Check that a onehot encoder is created for every subtype """
        cs = CSEncoder(self.params).fit(self.data)
        # Check types
        for subtype in self.params.subtypes:
            self.assertIsInstance(cs.onehot[subtype], OHEncoder)

    def test_calc_parameters(self):
        """
        Test if the parameters computed for a sample tick are correct.
        """
        # Check with the first tick. (50, 100, 0, 50.5)
        cse = CSEncoder(self.params, self.data.iloc[0])
        self.assertTrue(cse.positive)
        self.assertFalse(cse.negative)

        # Percentiles, etc...
        self.assertLessEqual(cse.body_relative_size, 0.05)
        self.assertEqual(cse.hl_interval_width, 100.)
        self.assertEqual(cse.oc_interval_width, 0.5)
        self.assertEqual(cse.mid_body_point, 50.25)
        self.assertEqual(cse.mid_body_percentile, 0.5025)
        self.assertEqual(cse.min_percentile, 0.5)
        self.assertEqual(cse.max_percentile, 0.505)
        self.assertEqual(cse.upper_shadow_len, 49.5)
        self.assertEqual(cse.upper_shadow_percentile, 0.495)
        self.assertEqual(cse.lower_shadow_len, 50.)
        self.assertEqual(cse.lower_shadow_percentile, 0.5)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.005)
        self.assertEqual(cse.body_relative_size, 0.005)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.005)

        # Body position.
        self.assertTrue(cse.shadows_symmetric)
        self.assertTrue(cse.body_in_center)
        self.assertFalse(cse.body_in_lower_half)
        self.assertFalse(cse.body_in_upper_half)
        self.assertTrue(cse.has_both_shadows)
        self.assertTrue(cse.has_lower_shadow)
        self.assertTrue(cse.has_upper_shadow)

        # Check with the second tick. (80, 100, 0, 70)
        cse = CSEncoder(self.params, self.data.iloc[1])
        self.assertIsNot(cse.positive, cse.negative)
        self.assertFalse(cse.positive)
        self.assertTrue(cse.negative)

        # Percentiles, etc...
        self.assertLessEqual(cse.body_relative_size, 0.1, 'Body relative size')
        self.assertEqual(cse.hl_interval_width, 100.)
        self.assertEqual(cse.oc_interval_width, 10.)
        self.assertEqual(cse.mid_body_point, 75.)
        self.assertEqual(cse.mid_body_percentile, 0.75)
        self.assertEqual(cse.min_percentile, 0.7)
        self.assertEqual(cse.max_percentile, 0.8)
        self.assertEqual(cse.upper_shadow_len, 20.)
        self.assertEqual(cse.upper_shadow_percentile, 0.2)
        self.assertEqual(cse.lower_shadow_len, 70.)
        self.assertEqual(cse.lower_shadow_percentile, 0.7)
        self.assertEqual(cse.body_relative_size, 0.1)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.5)

        # Body position.
        self.assertFalse(cse.body_in_center)
        self.assertFalse(cse.body_in_lower_half)
        self.assertTrue(cse.body_in_upper_half)
        self.assertTrue(cse.has_both_shadows)
        self.assertTrue(cse.has_lower_shadow)
        self.assertTrue(cse.has_upper_shadow)
        self.assertFalse(cse.shadows_symmetric)

    def test_encode_with(self):
        """
        Ensure correct encoding with sample data in class and robust
        type checking.
        """
        # Start checking that the first one is correctly encoded.
        cs = CSEncoder(self.params, self.data.iloc[0])
        with self.assertRaises(AssertionError):
            cs._encode_with('123')
        self.assertEqual(cs._encode_with('ABCDE'), 'A')
        # Try with the third one
        cs = CSEncoder(self.params, self.data.iloc[2])
        self.assertEqual(cs._encode_with('KLMNO'), 'M')

    def test__encode_body(self):
        """Ensure a proper encoding of the sample ticks in test_utils"""
        tags = encoded_tags()
        for i in range(self.data.shape[0]):
            self.assertEqual(
                CSEncoder(self.params, self.data.iloc[i])._encode_body(),
                tags.iloc[i]['body'][1])

    def test_encode_body(self):
        """
        Ensure a proper encoding of the sample ticks in test_utils
        """
        tags = encoded_tags()
        for i in range(self.data.shape[0]):
            cs = CSEncoder(self.params, self.data.iloc[i])
            self.assertEqual(cs.encode_body(), tags.iloc[i]['body'])

    def test_transform(self):
        """
        Test the method in charge of transforming an entire list of
        ticks into CSE format. It's only goal is to return an array with
        all of them.
        """
        cse = CSEncoder(self.params).fit_transform(self.data)
        self.assertEqual(len(cse), 6)
        for i in range(len(cse)):
            self.assertIsInstance(cse[i], CSEncoder)

    def test_inverse_transform(self):
        """
        Test that we can reverse a transformation to the original values.
        """
        encoder = CSEncoder(self.params)
        cse = encoder.fit_transform(self.data)
        # Inverse transform needs a dataframe as input, and the first CS.
        df = cs_to_df(cse, self.params.cse_tags)
        inv_cse = encoder.inverse_transform(df, cse[0])
        for i in range(inv_cse.shape[0]):
            self.assertEqual(inv_cse.iloc[i]['o'], self.data.iloc[i]['o'])

    def test_encode_tick(self):
        """
        This one checks if the CSEncoder is built and encoded, given
        that a tick is passed together with its previous one. If the previous
        one is None, then movement is not encoded.
        -------------------------------------------
        0   10  20  30  40  50  60  70  80  90  100
        A   B   C   D   E   F   G   H   I   J   K
        -------------------------------------------
        """
        # Start with the first one, which has no previous tick
        encoder = CSEncoder(self.params).fit(self.data)
        tags = encoded_tags()
        deltas = encoded_deltas()

        previous_row = None
        for i, row in self.data.iterrows():
            cs = encoder._encode_tick(row, previous_row)
            self.assertIsInstance(cs, CSEncoder)
            self.assertEqual(cs.encoded_delta_open, tags.at[i, 'delta_open'])
            self.assertEqual(cs.encoded_delta_high, tags.at[i, 'delta_high'])
            self.assertEqual(cs.encoded_delta_low, tags.at[i, 'delta_low'])
            self.assertEqual(cs.encoded_delta_close, tags.at[i, 'delta_close'])
            self.assertAlmostEqual(cs.delta_open, deltas.at[i, 'delta_open'])
            self.assertAlmostEqual(cs.delta_high, deltas.at[i, 'delta_high'])
            self.assertAlmostEqual(cs.delta_low, deltas.at[i, 'delta_low'])
            self.assertAlmostEqual(cs.delta_close, deltas.at[i, 'delta_close'])
            previous_row = cs

    def test_encode_movement(self):
        """
        Given candlestick (CSEncoder) object, compute how is its movement
        with respect to its previous one, which is passes as argument.
        Since first, second and third are tested in test_encode_tick()
        I will only test 4th against 3rd.
        -------------------------------------------
        0   10  20  30  40  50  60  70  80  90  100
        A   B   C   D   E   F   G   H   I   J   K
        -------------------------------------------
        """
        tags = encoded_tags()
        deltas = encoded_deltas()
        prev_cs = CSEncoder(self.params, self.data.iloc[0])
        for i in range(1, self.data.shape[0]):
            cs = CSEncoder(self.params, self.data.iloc[i])
            cs.encode_movement(prev_cs)
            self.assertAlmostEqual(cs.delta_open, deltas.iloc[i].delta_open)
            self.assertAlmostEqual(cs.delta_high, deltas.iloc[i].delta_high)
            self.assertAlmostEqual(cs.delta_low, deltas.iloc[i].delta_low)
            self.assertAlmostEqual(cs.delta_close, deltas.iloc[i].delta_close)
            self.assertEqual(cs.encoded_delta_open, tags.iloc[i].delta_open)
            self.assertEqual(cs.encoded_delta_high, tags.iloc[i].delta_high)
            self.assertEqual(cs.encoded_delta_low, tags.iloc[i].delta_low)
            self.assertEqual(cs.encoded_delta_close, tags.iloc[i].delta_close)
            prev_cs = cs

    def test_recursive_encode_movement(self):
        """
        A value is search within a range of discrete values(buckets).
        Once found, the corresponding substring at the position of the
        bucket is returned.
        """
        encoder = CSEncoder(self.params).fit(self.data)
        # Check calls with default dictionaries
        self.assertEqual(encoder._encode_movement(value=0.0), 'A')
        self.assertEqual(encoder._encode_movement(value=0.1), 'B')
        self.assertEqual(encoder._encode_movement(value=0.2), 'C')
        self.assertEqual(encoder._encode_movement(value=0.3), 'D')
        self.assertEqual(encoder._encode_movement(value=0.4), 'E')
        self.assertEqual(encoder._encode_movement(value=0.5), 'F')
        self.assertEqual(encoder._encode_movement(value=0.6), 'G')
        self.assertEqual(encoder._encode_movement(value=0.7), 'H')
        self.assertEqual(encoder._encode_movement(value=0.8), 'I')
        self.assertEqual(encoder._encode_movement(value=0.9), 'J')
        self.assertEqual(encoder._encode_movement(value=1.1), 'K')

    def test_decode_cse(self):
        """
        Check that decodes correctly a tick, given the previous one.
        """
        col_names = list(self.params.csv_dict.keys())
        if 'd' in col_names:
            col_names.remove('d')
        encoder = CSEncoder(self.params).fit(self.data)
        cs = encoder.transform(self.data)

        # Check every tick
        for i in range(self.data.shape[0]):
            cs_df = encoded_cs_to_df(cs[i], self.params.cse_tags)
            prev_cs = cs[0] if i == 0 else cs[i - 1]
            # Define tolerance as 10% of the min-max range when reconstructing
            tol = prev_cs.hl_interval_width * 0.1
            # Decode the CS, and check
            tick = encoder._decode_cse(cs_df, prev_cs, col_names)
            self.assertLessEqual(abs(tick[0] - self.data.iloc[i]['o']), tol)
            self.assertLessEqual(abs(tick[1] - self.data.iloc[i]['h']), tol)
            self.assertLessEqual(abs(tick[2] - self.data.iloc[i]['l']), tol)
            self.assertLessEqual(abs(tick[3] - self.data.iloc[i]['c']), tol)
Ejemplo n.º 4
0
class MyHandler:
    def __init__(self):
        """
        constructor
        """
        self.model = MyDict()  # key is question id, value is model
        #self.model_path = '/Volumes/E/workspace/IntelligentJudgmentModel/src/com/aic/pij/model/'
        self.model_path = main_path().rsplit('/', 1)[0] + '/model/'
        model_name = self.eachFile(self.model_path)
        print('Model loading ...')
        for ele in model_name:
            #ele looks like '2017_01_qm_11_1.svm.cf.m'
            print('  ', ele)
            ele_list = ele.split('.')
            question_model = self.load_model(self.model_path + ele,
                                             ele_list[-1])
            self.model.add(key=ele_list[0],
                           sub_key=ele_list[1],
                           value=[question_model, ele_list[2], ele_list[-1]])
        print('Model loading completed ...')

    def judgment(self, question_id, question_content):
        '''
        Client calls the model cumpute and get a result from this funtion
        :param question_id: ID of question in paper
        :param question_content: answer of question ID
        '''
        # generator tf-idf feature
        # adfd$dhdha$kdkajh
        # content = question_content.split('$')
        tfidf_X = tf_idf_generator(
            data=[
                question_content
            ],  # list() method can't be used to convert question_content to a list
            model=self.model.get(question_id, 'tfidf')[0])

        order_X = order_fea_generator(data=[question_content],
                                      model=self.model.get(
                                          question_id, 'order')[0],
                                      feature_len=78)

        # Note: the returned type must be the predefined type, this problem cost my a half day
        if question_id not in self.model.keys():
            res = question_id + question_content
            return res
        elif question_id == '2017_01_qm_luwang':
            #             sub_models = self.model.get_sub_item(question_id)
            #             res = 0
            #             for key in sub_models.keys():
            #                 if sub_models.get(key)[2] == 'h5':
            #                     res += sub_models.get(key)[0].predict(tfidf_X)
            #                 elif sub_models.get(key)[2] == 'm':
            #                     res += sub_models.get(key)[0].predict_proba(tfidf_X)
            rf = self.model.get(question_id, 'rf')[0]
            logreg = self.model.get(question_id, 'logreg')[0]
            lstm = self.model.get(question_id, 'lstm')[0]
            res = rf.predict_proba(tfidf_X) + logreg.predict_proba(
                tfidf_X) + lstm.predict(order_X)
            res = np.argmax(res)
        else:
            svm = self.model.get(question_id, 'svm')[0]
            res = svm(tfidf_X)
        print(res)
        return str(res)

    def load_model(self, model, model_type):
        """
        load the model according to model path 'model'
        :param model: model to be loaded
        :param model_type: the type of model. optional value 'm','h5',‘pkl’
        """
        if model_type == 'm':
            m = joblib.load(model)
            return m
        elif model_type == 'h5':
            h5 = load_model(model)
            return h5
        elif model_type == 'pkl':
            f = open(model, 'rb')
            pkl = pickle.load(f)
            f.close()
            return pkl

    def eachFile(self, file_path):
        """
        traverse all files in path 'file_path'
        :param file_path: str type, path of traverse a directory
        """
        pathDir = os.listdir(file_path)
        file_names = list()
        for allDir in pathDir:
            child = os.path.join('%s%s' % (file_path, allDir))
            file_names.append(str(child).split('/')[-1])
        if '.DS_Store' in file_names:
            file_names.remove('.DS_Store')
        return file_names
Ejemplo n.º 5
0
class TestOHEncoder(TestCase):
    params = MyDict()
    params.log = MyDict()
    params.log.debug = do_nothing
    params.log.info = do_nothing
    params.input_file = 'DAX100.csv'
    params.subtypes = ['body', 'move']

    @classmethod
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test
        use setUpClass() and store the result as class variable """
        super(TestOHEncoder, cls).setUpClass()
        cls.categories = np.array(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'))
        cls.width = cls.categories.shape[0]
        cls.data = sample_ticks()

    def test_reset(self):
        """Check if this function is properly resetting internal dicts"""
        oh = OHEncoder(self.params).fit(self.categories).reset()
        self.assertFalse(oh.states)
        self.assertFalse(oh.dictionary)
        self.assertFalse(oh.inv_dict)

    def test_fit(self):
        """It builds three dictionaries. Check that they're correctly built"""
        oh = OHEncoder(self.params).fit(self.categories)
        self.assertEqual(len(oh.states), len(self.categories))
        self.assertEqual(len(oh.dictionary), len(self.categories))
        self.assertEqual(len(oh.inv_dict), len(self.categories))
        self.assertEqual(len(oh.dictionary.keys()), len(self.categories))
        self.assertEqual(len(oh.inv_dict.keys()), len(self.categories))
        # 2D arrays missing from this test

    def test_encode(self):
        """
        Encodes an array of strings (signed or unsigned). If they are signed
        each string starts with character 'p' (positive) or 'n' (negative).
        One-hot encoder uses the dictionary passed during object creation
        to determine the length of the binary representation.
        """
        # One dimensinoal, Signed case
        oh = OHEncoder(self.params).fit(self.categories)
        bodies = pd.DataFrame({0: ['pB', 'nC']})
        result = oh.encode(bodies)
        self.assertEqual(result.sum(axis=1).iloc[0], +1.)
        self.assertEqual(result.sum(axis=1).iloc[1], -1.)
        self.assertEqual(result.iloc[0, 0], 0.)
        self.assertEqual(result.iloc[0, 1], 1.)
        self.assertEqual(result.iloc[1, 2], -1.)
        self.assertEqual(result.iloc[0, 3], 0.)

        # One-dimensional, Unsigned case
        oh = OHEncoder(self.params, signed=False).fit(self.categories)
        bodies = pd.DataFrame({0: ['B', 'C']})
        result = oh.encode(bodies)
        self.assertEqual(result.sum(axis=1).iloc[0], +1.)
        self.assertEqual(result.sum(axis=1).iloc[1], +1.)
        self.assertEqual(result.iloc[0, 0], 0.)
        self.assertEqual(result.iloc[0, 1], 1.)
        self.assertEqual(result.iloc[1, 2], 1.)
        self.assertEqual(result.iloc[1, 3], 0.)

        # Bi-dimensional, signed case.
        # Encode pA & pB in a row of 26x2 bits
        # Encode pY & nZ in a row of 26x2 bits
        oh = OHEncoder(self.params).fit(self.categories)
        bodies = pd.DataFrame({0: ['pA', 'pY'], 1: ['pB', 'nZ']})
        result = oh.encode(bodies)
        width = self.categories.shape[0]
        self.assertEqual(result.sum(axis=1).iloc[0], +2.)
        self.assertEqual(result.sum(axis=1).iloc[1], 0.)
        self.assertEqual(result.iloc[0, 0], 1.)
        self.assertEqual(result.iloc[0, 1], 0.)
        self.assertEqual(result.iloc[0, self.width], 0.)
        self.assertEqual(result.iloc[0, self.width + 1], 1.)
        self.assertEqual(result.iloc[1, self.width - 2], 1.)
        self.assertEqual(result.iloc[1, self.width - 1], 0.)
        self.assertEqual(result.iloc[1, self.width * 2 - 1], -1.)
        self.assertEqual(result.iloc[1, self.width * 2 - 2], 0.)

        # Bi-dimensional, unsigned case.
        # Encode pA & pB in a row of 26x2 bits
        # Encode pY & nZ in a row of 26x2 bits
        oh = OHEncoder(self.params, signed=False).fit(self.categories)
        bodies = pd.DataFrame({0: ['A', 'Y'], 1: ['B', 'Z']})
        result = oh.encode(bodies)
        self.assertEqual(result.sum(axis=1).iloc[0], +2.)
        self.assertEqual(result.sum(axis=1).iloc[1], 2.)
        self.assertEqual(result.iloc[0, 0], 1.)
        self.assertEqual(result.iloc[0, 1], 0.)
        self.assertEqual(result.iloc[0, self.width], 0.)
        self.assertEqual(result.iloc[0, self.width + 1], 1.)
        self.assertEqual(result.iloc[1, self.width - 2], 1.)
        self.assertEqual(result.iloc[1, self.width - 1], 0.)
        self.assertEqual(result.iloc[1, self.width * 2 - 1], 1.)
        self.assertEqual(result.iloc[1, self.width * 2 - 2], 0.)

    def test_decode(self):
        """Check that decoding result in any of the elements of the
        dictionary."""
        # Single array of 26 positions with a 1 in first position, must be the
        # first letter
        oh = OHEncoder(self.params).fit(self.categories)
        data = np.zeros(self.width)
        data[0] = 1.
        result = oh.decode(data)
        self.assertEqual(result, ['pA'])

        # Two arrays must be two strings. Try with 'A' and 'Z'.
        oh = OHEncoder(self.params).fit(self.categories)
        data = np.zeros((2, self.width))
        data[0][0] = 1
        data[1][self.width - 1] = 1
        result = oh.decode(data)
        self.assertListEqual(list(result), ['pA', 'pZ'])

        # Unsigned cases
        oh = OHEncoder(self.params, signed=False).fit(self.categories)
        data = np.zeros(self.width)
        data[0] = 1.
        result = oh.decode(data)
        self.assertEqual(result, ['A'])

        # Two arrays must be two strings. Try with 'A' and 'Z'.
        oh = OHEncoder(self.params, signed=False).fit(self.categories)
        data = np.zeros((2, self.width))
        data[0][0] = 1
        data[1][self.width - 1] = 1
        result = oh.decode(data)
        self.assertListEqual(list(result), ['A', 'Z'])
Ejemplo n.º 6
0
    def __init__(self, default_params_filename='params.yaml', **kwargs):
        """
        Read the parameters from a default filename
        :return:
        """
        super().__init__(**kwargs)
        params = {}
        cwd = Path(getcwd())
        params_path: str = str(cwd.joinpath(default_params_filename))

        with open(params_path, 'r') as stream:
            try:
                params = safe_load(stream)
            except YAMLError as exc:
                print(exc)

        self.add_dict(self, params)

        # Check that I've states and actions to start playing with.
        if not self._action or not self._state:
            raise AssertionError(
                'No states or actions defined in config file.')

        # Build a dictionary with a sequential number associated to each action
        setattr(self, '_action_id', MyDict())
        for tup in zip(self._action, range(len(self._action))):
            self._action_id[tup[0]] = tup[1]

        # Build the reverse dictionary for the actions dictionary
        setattr(self, '_action_name', MyDict())
        for tup in zip(range(len(self._action)), self._action):
            self._action_name[tup[0]] = tup[1]

        # Specific attributes to store number of actions and states.
        setattr(self, '_num_actions', len(self._action))

        # Build a list of lists with the names of all possible states.
        setattr(self, '_states_list', list())
        for state in self._state.keys():
            if state[0] == '_':
                self._states_list.append(self._state[state]._names)

        # Compute the total number of states as the multiplication of the
        # number of substates in eachs posible state-stack
        setattr(self, '_num_states', int)
        self._num_states = 1
        for state in self._state.keys():
            self._num_states = self._num_states * len(
                self._state[state]._names)

        # Create a display property to centralize all reporting activity into
        # a single function. That way I can store it all in a single dataframe
        # for later analysis.
        setattr(self, 'display', Display)
        self.display = Display(self)

        # Create a DataFrame within the configuration to store all the values
        # that are relevant to later perform data analysis.
        # The YAML file contains the column names in a parameter called
        # table_headers.
        setattr(self, 'results', DataFrame)
        self.results = DataFrame(columns=self._table_headers)
Ejemplo n.º 7
0
    def __init__(self, default_params_filename='params.yaml', *args, **kwargs):

        # Extend the dictionary with the values passed in arguments.
        # Call the Dictionary constructor once the parameters file is set.
        arguments = Arguments(args, kwargs)
        if arguments.args.config_file is not None:
            parameters_file = arguments.args.config_file[0]
        else:
            parameters_file = default_params_filename
        super().__init__(parameters_file, **kwargs)

        # Check that I've states and actions to start playing with.
        if not (self.action and self.state):
            raise AssertionError(
                'No states or actions defined in config file.')

        # Override other potential parameters specified in command line.
        setattr(self, 'debug', arguments.args.debug is not None)
        setattr(self, 'log_level',
                arguments.args.debug[0] if arguments.args.debug else 3)

        # Start the logger
        if 'log_level' not in self:
            self.log_level = 3  # default value = INFO
        self.log = Logger(self.log_level)

        self.log.info(
            'Using configuration parameters from: {}'.format(parameters_file))

        # Define if acting in BULL or BEAR mode
        if arguments.args.trading_mode is None:
            setattr(self, 'mode', 'bull')
        else:
            setattr(self, 'mode', arguments.args.trading_mode[0])
        self.log.info('Trading in {} mode'.format(self.mode))

        # Define what to do
        setattr(self, 'possible_actions', arguments.possible_actions)
        setattr(self, 'what_to_do', arguments.args.action)
        self.log.info('{} mode'.format(self.what_to_do))

        setattr(self, 'forecast_file', arguments.args.forecast[0])

        # Load the NN model file, only if the action is not "train"
        if arguments.args.action != 'train' and arguments.args.model:
            setattr(self, 'model_file', arguments.args.model[0])
        elif arguments.args.action != 'train':
            self.log.error('Model file must be specified with -m argument')
            raise ValueError('Model file must be specified with -m argument')

        setattr(self, 'no_dump', arguments.args.no_dump)
        setattr(self, 'do_plot', arguments.args.plot)
        setattr(self, 'save_model', arguments.args.save)
        setattr(self, 'totals', arguments.args.totals)
        setattr(self, 'short', arguments.args.short)
        if arguments.args.epochs is not None:
            setattr(self, 'num_episodes', int(arguments.args.epochs))
        else:
            setattr(self, 'num_episodes', 1)

        # Init portfolio
        if arguments.args.init_portfolio is not None:
            setattr(self, 'init_portfolio', True)
            setattr(self, 'portfolio_name', arguments.args.init_portfolio[0])
        else:
            setattr(self, 'init_portfolio', False)
        # Use portfolio
        if arguments.args.portfolio is not None:
            setattr(self, 'use_portfolio', True)
            setattr(self, 'portfolio_name', arguments.args.portfolio[0])
        else:
            setattr(self, 'use_portfolio', False)

        # Check that if I want to predict, portfolio needs to be specified
        if self.what_to_do == 'predict' and 'portfolio_name' not in self:
            self.log.error(
                'When calling `predict`, provide a portfolio filename.')
            self.log.error(
                'To generate a portfolio, `simulate` with `--init-portfolio`')
            raise ValueError('wrong parameters')

        # Output filename specified
        if arguments.args.output is not None:
            setattr(self, 'output', arguments.args.output[0])
        else:
            setattr(self, 'output', None)

        #
        # Extensions to the dictionary
        #

        # Build a self with a sequential number associated to each action
        setattr(self, 'action_id', MyDict())
        for tup in zip(self.action, range(len(self.action))):
            self.action_id[tup[0]] = tup[1]

        # Build the reverse self for the actions self
        setattr(self, 'action_name', MyDict())
        for tup in zip(range(len(self.action)), self.action):
            self.action_name[tup[0]] = tup[1]

        # Specific attributes to store number of actions and states.
        setattr(self, 'num_actions', len(self.action))

        # Build a list of lists with the names of all possible states.
        setattr(self, 'states_list', list())
        for state in self.state.keys():
            self.states_list.append(self.state[state].names)

        # Compute the total number of states as the multiplication of the
        # number of substates in eachs posible state-stack
        setattr(self, 'num_states', int)
        self.num_states = 1
        for state in self.state.keys():
            self.num_states = self.num_states * len(self.state[state].names)
        self.log.debug('{} possible states'.format(self.num_states))

        # Create a display property to centralize all reporting activity into
        # a single function. That way I can store it all in a single dataframe
        # for later analysis.
        setattr(self, 'display', Display)
        self.display: Display = Display(self)