Exemple #1
0
def test_tps_make_feature_vectors():
    return  # the test code is deprecated
    # test 3 OTRs and consumption of entire set of messages
    vp = machine_learning.make_verbose_print(False)
    set_trace = machine_learning.make_set_trace(True)
    
    def make_trace_print_message(tp_info):
        index, cusip, rtt = tp_info
        return message.TracePrint(
            source='test_tps_make_feature_vectors',
            identifier=str(index),
            cusip=cusip,
            issuepriceid=str(index),
            datetime=datetime.datetime.now(),
            oasspread=float(index),
            trade_type=None,
            reclassified_trade_type=rtt,
            cancellation_probability=0.0,
            )
    
    trace_prints = (
        (0, 'p', 'B'),
        (1, 'o1', 'S'),
        (2, 'o2', 'S'),
        (3, 'p', 'S'),
        (4, 'o1', 'B'),
        (5, 'p', 'S'),
        (6, 'o2', 'B'),
        (7, 'o1', 'S'),
        (8, 'o1', 'B'),
        (9, 'p', 'S'),
        (10, 'o2', 'B'),
        (11, 'p', 'B'),
        (12, 'o2', 'S'),
    )
    tps = TracePrintSequence()
    for tp_info in trace_prints:
        msg = make_trace_print_message(tp_info)
        tps.accumulate(msg)
    assert len(tps._msgs) == len(trace_prints)
    set_trace()
    for rtt in ('B', 'S'):
        feature_vectors = tps.feature_vectors(
            cusips=('p', 'o1', 'o2'),
            n_feature_vectors=2,
            required_reclassified_trade_type=rtt,
            trace=False,
        )
        set_trace()
        assert len(feature_vectors) == 1
        for i, fv in enumerate(feature_vectors):
            print(rtt, i, fv['id_trigger_identifier'], fv['id_target_oasspread'])
            vp(fv)
Exemple #2
0
 def loop(msgs):
     'return (feature_vectors, unused messages)'
     vp = machine_learning.make_verbose_print(False)
     set_trace()
     feature_creators = (
         ('trace_print', features.trace_print),
         )
     result_feature_vectors = []
     result_unused = msgs
     pdb.set_trace()
     for i in range(0, n_feature_vectors, 1):
         msgs_to_be_used = msgs[i:]
         all_features = features.Features()
         for feature_creator in feature_creators:
             for cusip in cusips:
                 try:
                     cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip)
                 except exception.NoFeatures as e:
                     raise exception.Features('cusip %s, %s' % (cusip, e.msg))
                 if len(unused) < len(result_unused):
                     result_unused = copy.copy(unused)
                 # update feature names to incorporate the cusip
                 for k, v in cusip_features.items():
                     key = (
                         'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else
                         '%s_%s_%s' (feature_creator[0], cusip, k)
                     )
                     all_features.add(key, v)
         continue   # bypass old code, for now
         # try:
         #     fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type)
         #     vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % (
         #         i,
         #         fv['id_trigger_identifier'],
         #         len(msgs_to_be_used),
         #         len(unused),
         #     ))
         #     if False and i % 10 == 1:
         #         pdb.set_trace()
         #     result_feature_vectors.append(fv)
         #     if len(unused) < len(result_unused):
         #         result_unused = copy.copy(unused)
         # except exception.NoPriorEventWithCusipAndRtt as e:
         #     vp('stub: handle exception %s' % e)
         #     break
         # except exception.NoMessageWithCusip as e:
         #     vp('stub: handle exception %s' % e)
         #     break
     set_trace()
     return list(reversed(result_feature_vectors)), result_unused
Exemple #3
0
def trace_print(msgs: typing.List[shared_message.Message], cusip: str, debug=False) -> FeatureVector:
    'return (Features from msgs, unused messages) or raise NoFeatures'
    # create features from a trace print and the prior trace print
    # return empty feature if not able to create features
    # the features are formed from two trace prints
    # The caller modifies the feature vector keys to include the name of this functions
    # in those keys, so that the keys will be unique across all features. So DO NOT
    # include the name of this function in the keys of the feature vector.
    def find_messages(msgs, cusip, reclassified_trade_type) -> typing.List[shared_message.Message]:
        'attempt to find first 2 messages with specified attributes'
        'return list of first 2 messages with the cusip and reclassified trade type and unused messages'
        result = []
        for i, msg in enumerate(msgs):
            if msg.cusip == cusip and msg.reclassified_trade_type == reclassified_trade_type:
                result.append(msg)
                if len(result) == 2:
                    return result, msgs[i + 1:]
        raise exception.NoFeatures('features.trace_print: not 2 %s %s messages' % (cusip, reclassified_trade_type))

    def add_features(result: FeatureVector, rtt: str, msgs: typing.List[shared_message.Message]):
        # mutate result by adding features from 2 trace print messages'
        assert len(msgs) == 2
        msg0 = msgs[0]  # most recent message
        msg1 = msgs[1]  # message just before the most recent message
        result['id_%s_msg0_issuepriceid' % rtt] = msg0.issuepriceid
        result['id_%s_msg1_issuepriceid' % rtt] = msg1.issuepriceid
        result['%s_oasspread' % rtt] = msg0.oasspread
        result['%s_oasspread_less_prior' % rtt] = msg0.oasspread - msg1.oasspread
        result['%s_oasspread_divided_by_prior' % rtt] = (
            100.0 if msg1.oasspread == 0.0 else msg0.oasspread / msg1.oasspread
            )

    set_trace = machine_learning.make_set_trace(debug)
    vp = machine_learning.make_verbose_print(debug)
    vpp = machine_learning.make_verbose_pp(debug)
    set_trace()
    B_messages, B_unused = find_messages(msgs, cusip, 'B')
    S_messages, S_unused = find_messages(msgs, cusip, 'S')
    result = FeatureVector()
    add_features(result, 'B', B_messages)
    add_features(result, 'S', S_messages)
    vp('features_B_and_S')
    vpp(result)
    set_trace()
    return result, B_unused if len(B_unused) < len(S_unused) else S_unused
Exemple #4
0
    def test_2a():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()
        fv, unused = trace_print(msgs, 'p', debug=False)
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o2')
        assert len(unused) == 2
        assert len_features(fv) == 6
Exemple #5
0
 def feature_vector(msgs, cusips, required_reclassified_trade_type):
     'return (feature_vector, unused messages)'
     def key_with_cusip_info(k, i):
         first, *others = k.split('_')
         return '%s_%s_%s' % (
             first,
             'primary' if i == 0 else 'otr%d' % i,
             k[len(first) + 1:],
             )
     
     if False and trace:
         pdb.set_trace()
     vp = machine_learning.make_verbose_print(False)
     result_unused_messages = msgs
     # NOTE: these field names are used by message.FeatureVectors.__repr__()
     # Don't change them here unless you also change them there
     trace_print_with_oasspread, _ = find_cusip_rtt(
         msgs,
         cusips[0],
         required_reclassified_trade_type,
         )
     vp('trace_print_with_oasspread', trace_print_with_oasspread)
     result_feature_vector = {
         'id_target_oasspread': trace_print_with_oasspread.oasspread,
         'id_target_reclassified_trade_type': required_reclassified_trade_type,
         'id_trigger_source': msgs[0].source,
         'id_trigger_identifier': msgs[0].identifier,
         'id_trigger_reclassified_trade_type': msgs[0].reclassified_trade_type,
         'id_trigger_event_datetime': msgs[0].datetime,
         }
     for i, cusip in enumerate(cusips):
         cf, unused_messages = cusip_features(msgs, cusip)
         vp('cusip_features result', i, cusip, len(cf), len(unused_messages))
         result_feature_vector['id_feature_vector_%s' % ('primary' if i == 0 else 'otr%d' % i)] = cusip
         for k, v in cf.items():
             # adjust the keys to reflect whether the features are from the primary or OTR cusip
             result_feature_vector[key_with_cusip_info(k, i)] = v
         if len(unused_messages) < len(result_unused_messages):
             result_unused_messages = copy.copy(unused_messages)
     return result_feature_vector, result_unused_messages
Exemple #6
0
    def test_2b():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()[1:]  # start at send message

        fv, unused = trace_print(msgs, 'p')
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        try:
            fv, unused = trace_print(msgs, 'o2')
            assert False, 'should have raised exception'
        except exception.NoFeatures as e:
            vp('expected exception', e)
Exemple #7
0
def test_FeatureVector():
    set_trace = machine_learning.make_set_trace(False)
    vp = machine_learning.make_verbose_print(False)
    
    def test1():
        set_trace()
        ok = FeatureVector()
        ok['id_trace_print'] = 'abc'
        ok['a'] = 10.0

    def test2():
        set_trace()
        bad = FeatureVector()
        try:
            bad['a'] = 1  # must be a float, but is not
            assert False, 'should have raised an exception'
        except exception.FeatureVector as e:
            vp('exception', e)
        except Exception as e:
            print('raised unexpected exception', e)
            assert False, 'should have rased exception.FeatureVector'
    
    test1()
    test2()
Exemple #8
0
    def test_Train(self):
        debug = False
        set_trace = machine_learning.make_set_trace(debug)
        vp = machine_learning.make_verbose_print(debug)

        set_trace()

        source = 'testing'
        identifier = '123'

        x = Train(
            source=source,
            identifier=identifier,
            feature_vectors=self.feature_vectors,
        )
        vp(x)
        xx = from_string(str(x))
        vp(xx)
        self.assertTrue(isinstance(x, Train))
        self.assertTrue(isinstance(xx, Train))
        self.assertEqual(xx.source, source)
        self.assertEqual(xx.identifier, identifier)
        vp(xx.feature_vectors)
        self.assertEqual(xx.feature_vectors, self.feature_vectors)
Exemple #9
0
def test_trace_print():
    set_trace = machine_learning.make_set_trace(False)
    vp = machine_learning.make_verbose_print(False)
    
    def make_messages(*tests):
        def make_message(test):
            cusip, info, rtt = test
            return shared_message.TracePrint(
                source='trace_print_test',
                identifier=str(info),
                cusip=cusip,
                issuepriceid=str(info),
                datetime=datetime.datetime.now(),
                oasspread=float(info),
                trade_type=rtt,
                reclassified_trade_type=rtt,
                cancellation_probability=0.0,
                )

        msgs = []
        for test in tests:
            msgs.append(make_message(test))
        return msgs

    def make_messages_1():
        return make_messages(
            ('a', 1, 'B'),
            ('a', 2, 'S'),
            ('a', 3, 'B'),
            ('a', 4, 'S'),
            ('b', 5, 'B'),
        )

    def test_1a():
        msgs = make_messages_1()
        set_trace()
        r = trace_print(msgs, 'a')
        vp('test_ok', r)
        set_trace()
        try:
            r = trace_print(msgs, 'b')
            assert False, 'should raise an exception'
        except exception.NoFeatures as e:
            vp('raised', e)
            set_trace()

    def test_1b():
        msgs = make_messages_1()
        set_trace()
        try:
            r = trace_print(msgs, 'b')
            assert False, 'should have raised'
        except exception.NoFeatures as e:
            vp(e)
            # expect to be here

    def make_messages_2():
        return make_messages(
            ('o2', 12, 'S'),
            ('p', 11, 'B'),
            ('o2', 10, 'B'),
            ('p', 9, 'S'),
            ('o1', 8, 'B'),
            ('o1', 7, 'S'),
            ('o2', 6, 'B'),
            ('p', 5, 'S'),
            ('o1', 4, 'B'),
            ('p', 3, 'S'),
            ('o2', 2, 'S'),
            ('o1', 1, 'S'),
            ('p', 0, 'B'),
        )
    
    def len_features(fv):
        result = 0
        for k, v in fv.items():
            if k.startswith('id_'):
                pass
            else:
                result += 1
        return result
        
    def test_2a():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()
        fv, unused = trace_print(msgs, 'p', debug=False)
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o2')
        assert len(unused) == 2
        assert len_features(fv) == 6

    def test_2b():
        debug = False
        vp = machine_learning.make_verbose_print(debug)
        vpp = machine_learning.make_verbose_pp(debug)
        set_trace = machine_learning.make_set_trace(debug)

        set_trace()
        msgs = make_messages_2()[1:]  # start at send message

        fv, unused = trace_print(msgs, 'p')
        assert len(unused) == 0
        assert len_features(fv) == 6

        fv, unused = trace_print(msgs, 'o1')
        assert len(unused) == 1
        assert len_features(fv) == 6

        try:
            fv, unused = trace_print(msgs, 'o2')
            assert False, 'should have raised exception'
        except exception.NoFeatures as e:
            vp('expected exception', e)
    test_1a()
    test_1b()
    test_2a()
    test_2b()
Exemple #10
0
def do_work(config, verbose=True):
    pdb.set_trace()
    vp = machine_learning.make_verbose_print(verbose)
    vpp = machine_learning.make_verbose_pp(verbose)

    connection = shared_queue.PrimitiveBlockingConnection(
        path_for_input=make_path_for_input,
        paths_for_output=ExchangeRoutingPathMaker(config.get('out_events_base')).make_paths_for_output,
        )
    channel = connection.channel()

    secmaster = SecMaster(
        path=config.get('in_secmaster_path'),
        debug=False,
    )
    issuer = secmaster.get_issuer(config.get('primary_cusip'))

    primary_cusip = config.get('primary_cusip')
    routing_key = 'events.%s' % primary_cusip
    exchange = 'dummy_exchange'

    event_queue = make_event_queue(config, issuer)
    otr_cusip = {}  # key: cusip, value: int (>= 1)
    while True:
        try:
            event = next(event_queue)
        except StopIteration:
            break  # all the event readers are empty

        # handle the event
        vp('\nnext event:', event.datetime, event.source, event.source_identifier)
        if event.source == 'trace':
            print('handle trace event')
            if event.payload['cusip'] == primary_cusip or event.payload['cusip'] in otr_cusip:
                vp('trace print for primary or OTR cusip')
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    body=str(shared_message.TracePrint(
                        source='trace_%s.csv' % issuer,
                        identifier=event.source_identifier,
                        cusip=event.payload['cusip'],
                        issuepriceid=event.source_identifier,
                        datetime=event.datetime,
                        oasspread=float(event.payload['oas']),
                        trade_type=event.payload['trade_type'],
                        reclassified_trade_type=event.payload['reclassified_trade_type'],
                        cancellation_probability=0.0,  # for now
                    )),
                    )
            else:
                vp('trace print for neither primary nor OTR cusip')
        elif event.source == 'liq_flow_on_the_run':
            if event.payload['primary_cusip'] == primary_cusip:
                vp('handle liq_flow event for the primary cusip')
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    body=str(shared_message.SetPrimaryOTRs(
                        source='liq_flow_on_the_run_%s.csv' % issuer,
                        identifier=event.source_identifier,
                        primary_cusip=event.payload['primary_cusip'],
                        otr_cusips=(event.payload['otr_cusip'],),  # must be an iterable
                    )),
                    )
                otr_cusip[event.payload['otr_cusip']] = 1  # for now, just 1 OTR cusip
            else:
                vp('otr not for primary')
        elif event.source == 'etl.py':
            vp('handle etl.py event')
            if event.source_identifier == 'output_start':
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    body=str(shared_message.OutputStart(
                        source='elt.py',
                        identifier=str(datetime.datetime.now()),
                    )),
                )
            elif event.source_identifier == 'output_stop':
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    body=str(shared_message.OutputStop(
                        source='elt.py',
                        identifier=str(datetime.datetime.now()),
                    )),
                )
            elif event.source_identifier == 'primary_cusip':
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    message=str(shared_message.SetCusipPrimary(
                        source='etl.py',
                        identifier=str(datetime.datetime.now()),
                        cusip=event.payload['primary_cusip'],
                        )),
                    )
            elif event.source_identifier == 'set_version':
                channel.publish(
                    exchange=exchange,
                    routing_key=routing_key,
                    body=str(shared_message.SetVersion(
                        source='etl.py',
                        identifier=str(datetime.datetime.now()),
                        what=event.payload['what'],
                        version=event.payload['version'],
                        )),
                    )
            else:
                print('invalid event.source_identifier %s' % event.source_identifier)
                pdb.set_trace()
        else:
            print(event)
            print('unknown event source')
            pdb.set_trace()
    print('processed all of the events')
    connection.close()

    analysis(config, make_event_queue(config, issuer))