Esempio n. 1
0
def test_tps_make_feature_vectors():
    return  # the test code is deprecated
    # test 3 OTRs and consumption of entire set of messages
    vp = machine_learning.make_verbose_print(False)
    set_trace = machine_learning.make_set_trace(True)
    
    def make_trace_print_message(tp_info):
        index, cusip, rtt = tp_info
        return message.TracePrint(
            source='test_tps_make_feature_vectors',
            identifier=str(index),
            cusip=cusip,
            issuepriceid=str(index),
            datetime=datetime.datetime.now(),
            oasspread=float(index),
            trade_type=None,
            reclassified_trade_type=rtt,
            cancellation_probability=0.0,
            )
    
    trace_prints = (
        (0, 'p', 'B'),
        (1, 'o1', 'S'),
        (2, 'o2', 'S'),
        (3, 'p', 'S'),
        (4, 'o1', 'B'),
        (5, 'p', 'S'),
        (6, 'o2', 'B'),
        (7, 'o1', 'S'),
        (8, 'o1', 'B'),
        (9, 'p', 'S'),
        (10, 'o2', 'B'),
        (11, 'p', 'B'),
        (12, 'o2', 'S'),
    )
    tps = TracePrintSequence()
    for tp_info in trace_prints:
        msg = make_trace_print_message(tp_info)
        tps.accumulate(msg)
    assert len(tps._msgs) == len(trace_prints)
    set_trace()
    for rtt in ('B', 'S'):
        feature_vectors = tps.feature_vectors(
            cusips=('p', 'o1', 'o2'),
            n_feature_vectors=2,
            required_reclassified_trade_type=rtt,
            trace=False,
        )
        set_trace()
        assert len(feature_vectors) == 1
        for i, fv in enumerate(feature_vectors):
            print(rtt, i, fv['id_trigger_identifier'], fv['id_target_oasspread'])
            vp(fv)
Esempio n. 2
0
def trace_print(msgs: typing.List[shared_message.Message], cusip: str, debug=False) -> FeatureVector:
    'return (Features from msgs, unused messages) or raise NoFeatures'
    # create features from a trace print and the prior trace print
    # return empty feature if not able to create features
    # the features are formed from two trace prints
    # The caller modifies the feature vector keys to include the name of this functions
    # in those keys, so that the keys will be unique across all features. So DO NOT
    # include the name of this function in the keys of the feature vector.
    def find_messages(msgs, cusip, reclassified_trade_type) -> typing.List[shared_message.Message]:
        'attempt to find first 2 messages with specified attributes'
        'return list of first 2 messages with the cusip and reclassified trade type and unused messages'
        result = []
        for i, msg in enumerate(msgs):
            if msg.cusip == cusip and msg.reclassified_trade_type == reclassified_trade_type:
                result.append(msg)
                if len(result) == 2:
                    return result, msgs[i + 1:]
        raise exception.NoFeatures('features.trace_print: not 2 %s %s messages' % (cusip, reclassified_trade_type))

    def add_features(result: FeatureVector, rtt: str, msgs: typing.List[shared_message.Message]):
        # mutate result by adding features from 2 trace print messages'
        assert len(msgs) == 2
        msg0 = msgs[0]  # most recent message
        msg1 = msgs[1]  # message just before the most recent message
        result['id_%s_msg0_issuepriceid' % rtt] = msg0.issuepriceid
        result['id_%s_msg1_issuepriceid' % rtt] = msg1.issuepriceid
        result['%s_oasspread' % rtt] = msg0.oasspread
        result['%s_oasspread_less_prior' % rtt] = msg0.oasspread - msg1.oasspread
        result['%s_oasspread_divided_by_prior' % rtt] = (
            100.0 if msg1.oasspread == 0.0 else msg0.oasspread / msg1.oasspread
            )

    set_trace = machine_learning.make_set_trace(debug)
    vp = machine_learning.make_verbose_print(debug)
    vpp = machine_learning.make_verbose_pp(debug)
    set_trace()
    B_messages, B_unused = find_messages(msgs, cusip, 'B')
    S_messages, S_unused = find_messages(msgs, cusip, 'S')
    result = FeatureVector()
    add_features(result, 'B', B_messages)
    add_features(result, 'S', S_messages)
    vp('features_B_and_S')
    vpp(result)
    set_trace()
    return result, B_unused if len(B_unused) < len(S_unused) else S_unused
Esempio n. 3
0
    def test_2a():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()
        fv, unused = trace_print(msgs, 'p', debug=False)
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o2')
        assert len(unused) == 2
        assert len_features(fv) == 6
Esempio n. 4
0
    def test_2b():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()[1:]  # start at send message

        fv, unused = trace_print(msgs, 'p')
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        try:
            fv, unused = trace_print(msgs, 'o2')
            assert False, 'should have raised exception'
        except exception.NoFeatures as e:
            vp('expected exception', e)
Esempio n. 5
0
def test_FeatureVector():
    set_trace = machine_learning.make_set_trace(False)
    vp = machine_learning.make_verbose_print(False)
    
    def test1():
        set_trace()
        ok = FeatureVector()
        ok['id_trace_print'] = 'abc'
        ok['a'] = 10.0

    def test2():
        set_trace()
        bad = FeatureVector()
        try:
            bad['a'] = 1  # must be a float, but is not
            assert False, 'should have raised an exception'
        except exception.FeatureVector as e:
            vp('exception', e)
        except Exception as e:
            print('raised unexpected exception', e)
            assert False, 'should have rased exception.FeatureVector'
    
    test1()
    test2()
Esempio n. 6
0
    def test_Train(self):
        debug = False
        set_trace = machine_learning.make_set_trace(debug)
        vp = machine_learning.make_verbose_print(debug)

        set_trace()

        source = 'testing'
        identifier = '123'

        x = Train(
            source=source,
            identifier=identifier,
            feature_vectors=self.feature_vectors,
        )
        vp(x)
        xx = from_string(str(x))
        vp(xx)
        self.assertTrue(isinstance(x, Train))
        self.assertTrue(isinstance(xx, Train))
        self.assertEqual(xx.source, source)
        self.assertEqual(xx.identifier, identifier)
        vp(xx.feature_vectors)
        self.assertEqual(xx.feature_vectors, self.feature_vectors)
Esempio n. 7
0
def test_trace_print():
    set_trace = machine_learning.make_set_trace(False)
    vp = machine_learning.make_verbose_print(False)
    
    def make_messages(*tests):
        def make_message(test):
            cusip, info, rtt = test
            return shared_message.TracePrint(
                source='trace_print_test',
                identifier=str(info),
                cusip=cusip,
                issuepriceid=str(info),
                datetime=datetime.datetime.now(),
                oasspread=float(info),
                trade_type=rtt,
                reclassified_trade_type=rtt,
                cancellation_probability=0.0,
                )

        msgs = []
        for test in tests:
            msgs.append(make_message(test))
        return msgs

    def make_messages_1():
        return make_messages(
            ('a', 1, 'B'),
            ('a', 2, 'S'),
            ('a', 3, 'B'),
            ('a', 4, 'S'),
            ('b', 5, 'B'),
        )

    def test_1a():
        msgs = make_messages_1()
        set_trace()
        r = trace_print(msgs, 'a')
        vp('test_ok', r)
        set_trace()
        try:
            r = trace_print(msgs, 'b')
            assert False, 'should raise an exception'
        except exception.NoFeatures as e:
            vp('raised', e)
            set_trace()

    def test_1b():
        msgs = make_messages_1()
        set_trace()
        try:
            r = trace_print(msgs, 'b')
            assert False, 'should have raised'
        except exception.NoFeatures as e:
            vp(e)
            # expect to be here

    def make_messages_2():
        return make_messages(
            ('o2', 12, 'S'),
            ('p', 11, 'B'),
            ('o2', 10, 'B'),
            ('p', 9, 'S'),
            ('o1', 8, 'B'),
            ('o1', 7, 'S'),
            ('o2', 6, 'B'),
            ('p', 5, 'S'),
            ('o1', 4, 'B'),
            ('p', 3, 'S'),
            ('o2', 2, 'S'),
            ('o1', 1, 'S'),
            ('p', 0, 'B'),
        )
    
    def len_features(fv):
        result = 0
        for k, v in fv.items():
            if k.startswith('id_'):
                pass
            else:
                result += 1
        return result
        
    def test_2a():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()
        fv, unused = trace_print(msgs, 'p', debug=False)
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o2')
        assert len(unused) == 2
        assert len_features(fv) == 6

    def test_2b():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()[1:]  # start at send message

        fv, unused = trace_print(msgs, 'p')
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        try:
            fv, unused = trace_print(msgs, 'o2')
            assert False, 'should have raised exception'
        except exception.NoFeatures as e:
            vp('expected exception', e)
    test_1a()
    test_1b()
    test_2a()
    test_2b()
Esempio n. 8
0
def do_work(config):
    set_trace = machine_learning.make_set_trace(True)
    set_trace()
    n_required_feature_vectors = max(HpGrids.HpGrid5().n_trades_back_choices)
    feature_vector_identifiers = Identifier()
    n_required_feature_vectors = 300  # TODO: set based on model_specs
    creating_output = False
    all_trace_prints = []
    cusips = []  # [0] = primary, [1] = otr 1, [2] = otr 2, ...

    connection = shared_queue.PrimitiveBlockingConnection(
        path_for_input=InputPathMaker(config.get('in_path_prefix')).make_path_for_input,
        paths_for_output=OutputPathMaker(config.get('out_path_prefix')).make_paths_for_output,
        )
    channel = connection.channel()
    exchange = config.get('out_exchange')
    routing_key = 'events.%s.*' % config.get('primary_cusip')  # send every message to all of the experts
    input_queue = 'events.%s' % (config.get('primary_cusip'))
    channel.publish(
        exchange=exchange,
        routing_key=routing_key,
        body=str(shared_message.SetVersion(
            source='events_cusips.py',
            identifier=str(datetime.datetime.now()),
            what='machine_learning',
            version='1.0.0.0',
        )),
        )
    while True:
        try:
            s = channel.consume(input_queue)
        except StopIteration:
            break
        msg = shared_message.from_string(s)
        print('\n%r' % msg)
        if isinstance(msg, shared_message.BackToZero):
            pdb.set_trace()
            print('todo: implement BackToZero')
        elif isinstance(msg, shared_message.SetPrimaryOTRs):
            cusips = [msg.primary_cusip]
            for otr_cusip in msg.otr_cusips:
                cusips.append(otr_cusip)
        elif isinstance(msg, shared_message.SetVersion):
            channel.publish(
                exchange=exchange,
                routing_key=routing_key,
                body=str(msg),
                )
        elif isinstance(msg, shared_message.TracePrint):
            def make_feature_vector(msgs, cusips):
                'return (FeatureVector, unused msgs) or raise NoFeatures'
                if len(cusips) == 0:
                    log_msg = (
                        'A SetCusipOTRs message was not received before a StartOutput message was received'
                    )
                    log.critical(log_msg)
                    assert False, log_msg
                all_features = features.FeatureVector()
                shortest_unused = msgs
                for cusip in cusips:
                    for name, maker in (('trace_print', features.trace_print),):
                        try:
                            fv, unused = maker(msgs, cusip)
                        except exception.NoFeatures as e:
                            raise e
                        if len(unused) < len(shortest_unused):
                            shortest_unused = copy.copy(unused)
                        # rename the feature to use the name
                        # that makes the features unique
                        for k, v in fv.items():
                            key = (
                                'id_%s_%s_%s' % (name, cusip, k[3:]) if k.startswith('id_') else
                                '%s_%s_%s' % (name, cusip, k)
                                )
                            all_features[key] = v
                # create a unique identifier for the feature vector
                all_features['id_feature_vector'] = feature_vector_identifiers.get_next()
                # add id info from TracePrint for the first primary cusip
                for msg in msgs:
                    if msg.cusip == cusips[0]:
                        # found first trade for the primary cusip
                        all_features['id_primary_cusip'] = msg.cusip
                        all_features['id_primary_cusip_issuepriceid'] = msg.issuepriceid
                        all_features['id_primary_cusip_oasspread'] = msg.oasspread
                        all_features['id_primary_cusip_reclassified_trade_type'] = msg.reclassified_trade_type
                        break
                return all_features, shortest_unused

            # handle corrections
            made_correction = False
            for i, old_trace_print in enumerate(all_trace_prints):
                if old_trace_print.issuepriceid == msg.issuepriceid:
                    all_trace_prints[i] = msg  # replace the previous trace print message
                    made_correction = True
                    break
            if not made_correction:
                all_trace_prints.append(msg)

            if creating_output:
                all_msgs = list(reversed(all_trace_prints))
                try:
                    last_feature_vector, shortest_unused = make_feature_vector(all_msgs, cusips)
                except exception.NoFeatures as e:
                    # info ==> things are working as expected
                    log.info('unable to create even one feature vector: %s' % str(e))
                    continue
                channel.publish(
                    exchange=exchange,
                    routing_key='%s.expert.*' % config.get('primary_cusip'),  # {cusip}.expert.{model_spec}
                    body=str(message.Test(
                        source='events_cusip.py',
                        identifier=last_feature_vector['id_feature_vector'],
                        feature_vector=last_feature_vector,
                        )),
                    )

                pdb.set_trace()
                many_feature_vectors = [last_feature_vector]
                for i in range(1, len(all_msgs), 1):
                    try:
                        another_feature_vector, unused = make_feature_vector(all_msgs[i:])
                    except exception.NoFeatures as e:
                        log.info('unable to create %dth feature vector: %s' % (i, str(e)))
                        break
                    many_feature_vectors.append(another_feature_vector)
                    if len(unused) < len(shortest_unused):
                        shorted_unused = copy.copy(unused)
                pdb.set_trace()
                if len(many_feature_vectors) >= n_required_feature_vectors:
                    pdb.set_trace()
                    print('create B and S targets')
                    
                    print('reconcile targets with features')
                    channel.publish(
                        exchange=exchange,
                        routing_key='%s.expert.*' % config.get('primary_cusip'),
                        body=str(message.Train(
                            source='events_cusip.py',
                            identifier=last_feature_vector['id_feature_vector'],
                            feature_vectors=many_feature_vectors,  # TODO: include both B and S targets
                        )),
                    )
                # discard TracePrint objects that will never be used
                all_trace_prints = all_trace_prints[len(shorted_unused):]
        elif isinstance(msg, shared_message.TracePrintCancel):
            pdb.set_trace()
            print('todo: implement TracePrintCancel')
        elif isinstance(msg, shared_message.OutputStart):
            creating_output = True
        elif isinstance(msg, shared_message.OutputStop):
            pdb.set_trace()
            creating_output = False
        else:
            print(msg)
            print('%r' % msg)
            print('unrecognized input message type')
            pdb.set_trace()
                
    print('have read all messages')
    pdb.set_trace()
    connection.close()
Esempio n. 9
0
    def feature_vectors(self,
                        cusips: typing.List[str],  # primary, otr1, otr2, ...
                        n_feature_vectors: int,
                        required_reclassified_trade_type: str,
                        trace=False,
                        ):
        'return List[feature_vectors] with length up to n_feature_vectors'
        set_trace = machine_learning.make_set_trace(trace)
        
        def find_cusip(msgs, cusip):
            'return first message with the specified cusip'
            if False and trace:
                pdb.set_trace()
            for msg in msgs:
                if msg.cusip == cusip:
                    return msg
            pdb.set_trace()
            raise exception.NoMessageWithCusip(
                cusip=cusip,
                msgs=msgs,
            )
            
        def find_cusip_rtt(msgs, cusip, rtt):
            'return first msg with cusip and remaining messages'
            if False and trace:
                pdb.set_trace()
            for i, msg in enumerate(msgs):
                if msg.cusip == cusip and msg.reclassified_trade_type == rtt:
                    return msg, msgs[i + 1:]
            pdb.set_trace()
            raise exception.NoPriorEventWithCusipAndRtt(
                cusip=cusip,
                rtt=rtt,
                msgs=msgs,
            )
        
        def cusip_features(msgs, cusip):
            'return dict, unused_msgs'
            if False and trace:
                pdb.set_trace()
            result_dict = {}
            result_unused_messages = msgs
            for rtt in ('B', 'S'):
                first_msg, first_other_messages = find_cusip_rtt(msgs, cusip, rtt)
                second_msg, second_other_messages = find_cusip_rtt(first_other_messages, cusip, rtt)
                result_dict['id_first_%s_message_source' % rtt] = first_msg.source
                result_dict['id_first_%s_message_identifier' % rtt] = first_msg.identifier
                result_dict['id_second_%s_message_source' % rtt] = second_msg.source
                result_dict['id_second_%s_message_identifier' % rtt] = second_msg.identifier
                result_dict['trace_%s_oasspread' % rtt] = first_msg.oasspread
                result_dict['trace_%s_oasspread_less_prior_%s' % (rtt, rtt)] = (
                    first_msg.oasspread - second_msg.oasspread
                    )
                result_dict['trace_%s_oasspread_divided_by_prior_%s' % (rtt, rtt)] = (
                        first_msg.oasspread / second_msg.oasspread if second_msg.oasspread != 0.0 else
                        100.0
                        )
                if len(second_other_messages) < len(result_unused_messages):
                    result_unused_messages = copy.copy(second_other_messages)
            return result_dict, result_unused_messages
            
        def feature_vector(msgs, cusips, required_reclassified_trade_type):
            'return (feature_vector, unused messages)'
            def key_with_cusip_info(k, i):
                first, *others = k.split('_')
                return '%s_%s_%s' % (
                    first,
                    'primary' if i == 0 else 'otr%d' % i,
                    k[len(first) + 1:],
                    )
            
            if False and trace:
                pdb.set_trace()
            vp = machine_learning.make_verbose_print(False)
            result_unused_messages = msgs
            # NOTE: these field names are used by message.FeatureVectors.__repr__()
            # Don't change them here unless you also change them there
            trace_print_with_oasspread, _ = find_cusip_rtt(
                msgs,
                cusips[0],
                required_reclassified_trade_type,
                )
            vp('trace_print_with_oasspread', trace_print_with_oasspread)
            result_feature_vector = {
                'id_target_oasspread': trace_print_with_oasspread.oasspread,
                'id_target_reclassified_trade_type': required_reclassified_trade_type,
                'id_trigger_source': msgs[0].source,
                'id_trigger_identifier': msgs[0].identifier,
                'id_trigger_reclassified_trade_type': msgs[0].reclassified_trade_type,
                'id_trigger_event_datetime': msgs[0].datetime,
                }
            for i, cusip in enumerate(cusips):
                cf, unused_messages = cusip_features(msgs, cusip)
                vp('cusip_features result', i, cusip, len(cf), len(unused_messages))
                result_feature_vector['id_feature_vector_%s' % ('primary' if i == 0 else 'otr%d' % i)] = cusip
                for k, v in cf.items():
                    # adjust the keys to reflect whether the features are from the primary or OTR cusip
                    result_feature_vector[key_with_cusip_info(k, i)] = v
                if len(unused_messages) < len(result_unused_messages):
                    result_unused_messages = copy.copy(unused_messages)
            return result_feature_vector, result_unused_messages
        
        def loop(msgs):
            'return (feature_vectors, unused messages)'
            vp = machine_learning.make_verbose_print(False)
            set_trace()
            feature_creators = (
                ('trace_print', features.trace_print),
                )
            result_feature_vectors = []
            result_unused = msgs
            pdb.set_trace()
            for i in range(0, n_feature_vectors, 1):
                msgs_to_be_used = msgs[i:]
                all_features = features.Features()
                for feature_creator in feature_creators:
                    for cusip in cusips:
                        try:
                            cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip)
                        except exception.NoFeatures as e:
                            raise exception.Features('cusip %s, %s' % (cusip, e.msg))
                        if len(unused) < len(result_unused):
                            result_unused = copy.copy(unused)
                        # update feature names to incorporate the cusip
                        for k, v in cusip_features.items():
                            key = (
                                'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else
                                '%s_%s_%s' (feature_creator[0], cusip, k)
                            )
                            all_features.add(key, v)
                continue   # bypass old code, for now
                # try:
                #     fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type)
                #     vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % (
                #         i,
                #         fv['id_trigger_identifier'],
                #         len(msgs_to_be_used),
                #         len(unused),
                #     ))
                #     if False and i % 10 == 1:
                #         pdb.set_trace()
                #     result_feature_vectors.append(fv)
                #     if len(unused) < len(result_unused):
                #         result_unused = copy.copy(unused)
                # except exception.NoPriorEventWithCusipAndRtt as e:
                #     vp('stub: handle exception %s' % e)
                #     break
                # except exception.NoMessageWithCusip as e:
                #     vp('stub: handle exception %s' % e)
                #     break
            set_trace()
            return list(reversed(result_feature_vectors)), result_unused

        set_trace()
        assert n_feature_vectors >= 0
        result, unused = loop(list(reversed(self._msgs)))
        set_trace()
        self._msgs = self._msgs[len(unused):]
        if True:
            print('check that we get same results using possibly fewer messages')
            set_trace()
            # test: should get same feature vectors (the result)
            result2, unused2 = loop(list(reversed(self._msgs)))
            assert len(result) == len(result2)
            assert len(unused2) == 0
            for i, item in enumerate(result):
                item2 = result2[i]
                for k, v in item.items():
                    assert item2[k] == v
            set_trace()
        return result