def test_tps_make_feature_vectors(): return # the test code is deprecated # test 3 OTRs and consumption of entire set of messages vp = machine_learning.make_verbose_print(False) set_trace = machine_learning.make_set_trace(True) def make_trace_print_message(tp_info): index, cusip, rtt = tp_info return message.TracePrint( source='test_tps_make_feature_vectors', identifier=str(index), cusip=cusip, issuepriceid=str(index), datetime=datetime.datetime.now(), oasspread=float(index), trade_type=None, reclassified_trade_type=rtt, cancellation_probability=0.0, ) trace_prints = ( (0, 'p', 'B'), (1, 'o1', 'S'), (2, 'o2', 'S'), (3, 'p', 'S'), (4, 'o1', 'B'), (5, 'p', 'S'), (6, 'o2', 'B'), (7, 'o1', 'S'), (8, 'o1', 'B'), (9, 'p', 'S'), (10, 'o2', 'B'), (11, 'p', 'B'), (12, 'o2', 'S'), ) tps = TracePrintSequence() for tp_info in trace_prints: msg = make_trace_print_message(tp_info) tps.accumulate(msg) assert len(tps._msgs) == len(trace_prints) set_trace() for rtt in ('B', 'S'): feature_vectors = tps.feature_vectors( cusips=('p', 'o1', 'o2'), n_feature_vectors=2, required_reclassified_trade_type=rtt, trace=False, ) set_trace() assert len(feature_vectors) == 1 for i, fv in enumerate(feature_vectors): print(rtt, i, fv['id_trigger_identifier'], fv['id_target_oasspread']) vp(fv)
def loop(msgs): 'return (feature_vectors, unused messages)' vp = machine_learning.make_verbose_print(False) set_trace() feature_creators = ( ('trace_print', features.trace_print), ) result_feature_vectors = [] result_unused = msgs pdb.set_trace() for i in range(0, n_feature_vectors, 1): msgs_to_be_used = msgs[i:] all_features = features.Features() for feature_creator in feature_creators: for cusip in cusips: try: cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip) except exception.NoFeatures as e: raise exception.Features('cusip %s, %s' % (cusip, e.msg)) if len(unused) < len(result_unused): result_unused = copy.copy(unused) # update feature names to incorporate the cusip for k, v in cusip_features.items(): key = ( 'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else '%s_%s_%s' (feature_creator[0], cusip, k) ) all_features.add(key, v) continue # bypass old code, for now # try: # fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type) # vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % ( # i, # fv['id_trigger_identifier'], # len(msgs_to_be_used), # len(unused), # )) # if False and i % 10 == 1: # pdb.set_trace() # result_feature_vectors.append(fv) # if len(unused) < len(result_unused): # result_unused = copy.copy(unused) # except exception.NoPriorEventWithCusipAndRtt as e: # vp('stub: handle exception %s' % e) # break # except exception.NoMessageWithCusip as e: # vp('stub: handle exception %s' % e) # break set_trace() return list(reversed(result_feature_vectors)), result_unused
def trace_print(msgs: typing.List[shared_message.Message], cusip: str, debug=False) -> FeatureVector: 'return (Features from msgs, unused messages) or raise NoFeatures' # create features from a trace print and the prior trace print # return empty feature if not able to create features # the features are formed from two trace prints # The caller modifies the feature vector keys to include the name of this functions # in those keys, so that the keys will be unique across all features. So DO NOT # include the name of this function in the keys of the feature vector. def find_messages(msgs, cusip, reclassified_trade_type) -> typing.List[shared_message.Message]: 'attempt to find first 2 messages with specified attributes' 'return list of first 2 messages with the cusip and reclassified trade type and unused messages' result = [] for i, msg in enumerate(msgs): if msg.cusip == cusip and msg.reclassified_trade_type == reclassified_trade_type: result.append(msg) if len(result) == 2: return result, msgs[i + 1:] raise exception.NoFeatures('features.trace_print: not 2 %s %s messages' % (cusip, reclassified_trade_type)) def add_features(result: FeatureVector, rtt: str, msgs: typing.List[shared_message.Message]): # mutate result by adding features from 2 trace print messages' assert len(msgs) == 2 msg0 = msgs[0] # most recent message msg1 = msgs[1] # message just before the most recent message result['id_%s_msg0_issuepriceid' % rtt] = msg0.issuepriceid result['id_%s_msg1_issuepriceid' % rtt] = msg1.issuepriceid result['%s_oasspread' % rtt] = msg0.oasspread result['%s_oasspread_less_prior' % rtt] = msg0.oasspread - msg1.oasspread result['%s_oasspread_divided_by_prior' % rtt] = ( 100.0 if msg1.oasspread == 0.0 else msg0.oasspread / msg1.oasspread ) set_trace = machine_learning.make_set_trace(debug) vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace() B_messages, B_unused = find_messages(msgs, cusip, 'B') S_messages, S_unused = find_messages(msgs, cusip, 'S') result = FeatureVector() add_features(result, 'B', B_messages) add_features(result, 'S', S_messages) vp('features_B_and_S') vpp(result) set_trace() return result, B_unused if len(B_unused) < len(S_unused) else S_unused
def test_2a(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2() fv, unused = trace_print(msgs, 'p', debug=False) assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o2') assert len(unused) == 2 assert len_features(fv) == 6
def feature_vector(msgs, cusips, required_reclassified_trade_type): 'return (feature_vector, unused messages)' def key_with_cusip_info(k, i): first, *others = k.split('_') return '%s_%s_%s' % ( first, 'primary' if i == 0 else 'otr%d' % i, k[len(first) + 1:], ) if False and trace: pdb.set_trace() vp = machine_learning.make_verbose_print(False) result_unused_messages = msgs # NOTE: these field names are used by message.FeatureVectors.__repr__() # Don't change them here unless you also change them there trace_print_with_oasspread, _ = find_cusip_rtt( msgs, cusips[0], required_reclassified_trade_type, ) vp('trace_print_with_oasspread', trace_print_with_oasspread) result_feature_vector = { 'id_target_oasspread': trace_print_with_oasspread.oasspread, 'id_target_reclassified_trade_type': required_reclassified_trade_type, 'id_trigger_source': msgs[0].source, 'id_trigger_identifier': msgs[0].identifier, 'id_trigger_reclassified_trade_type': msgs[0].reclassified_trade_type, 'id_trigger_event_datetime': msgs[0].datetime, } for i, cusip in enumerate(cusips): cf, unused_messages = cusip_features(msgs, cusip) vp('cusip_features result', i, cusip, len(cf), len(unused_messages)) result_feature_vector['id_feature_vector_%s' % ('primary' if i == 0 else 'otr%d' % i)] = cusip for k, v in cf.items(): # adjust the keys to reflect whether the features are from the primary or OTR cusip result_feature_vector[key_with_cusip_info(k, i)] = v if len(unused_messages) < len(result_unused_messages): result_unused_messages = copy.copy(unused_messages) return result_feature_vector, result_unused_messages
def test_2b(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2()[1:] # start at send message fv, unused = trace_print(msgs, 'p') assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 try: fv, unused = trace_print(msgs, 'o2') assert False, 'should have raised exception' except exception.NoFeatures as e: vp('expected exception', e)
def test_FeatureVector(): set_trace = machine_learning.make_set_trace(False) vp = machine_learning.make_verbose_print(False) def test1(): set_trace() ok = FeatureVector() ok['id_trace_print'] = 'abc' ok['a'] = 10.0 def test2(): set_trace() bad = FeatureVector() try: bad['a'] = 1 # must be a float, but is not assert False, 'should have raised an exception' except exception.FeatureVector as e: vp('exception', e) except Exception as e: print('raised unexpected exception', e) assert False, 'should have rased exception.FeatureVector' test1() test2()
def test_Train(self): debug = False set_trace = machine_learning.make_set_trace(debug) vp = machine_learning.make_verbose_print(debug) set_trace() source = 'testing' identifier = '123' x = Train( source=source, identifier=identifier, feature_vectors=self.feature_vectors, ) vp(x) xx = from_string(str(x)) vp(xx) self.assertTrue(isinstance(x, Train)) self.assertTrue(isinstance(xx, Train)) self.assertEqual(xx.source, source) self.assertEqual(xx.identifier, identifier) vp(xx.feature_vectors) self.assertEqual(xx.feature_vectors, self.feature_vectors)
def test_trace_print(): set_trace = machine_learning.make_set_trace(False) vp = machine_learning.make_verbose_print(False) def make_messages(*tests): def make_message(test): cusip, info, rtt = test return shared_message.TracePrint( source='trace_print_test', identifier=str(info), cusip=cusip, issuepriceid=str(info), datetime=datetime.datetime.now(), oasspread=float(info), trade_type=rtt, reclassified_trade_type=rtt, cancellation_probability=0.0, ) msgs = [] for test in tests: msgs.append(make_message(test)) return msgs def make_messages_1(): return make_messages( ('a', 1, 'B'), ('a', 2, 'S'), ('a', 3, 'B'), ('a', 4, 'S'), ('b', 5, 'B'), ) def test_1a(): msgs = make_messages_1() set_trace() r = trace_print(msgs, 'a') vp('test_ok', r) set_trace() try: r = trace_print(msgs, 'b') assert False, 'should raise an exception' except exception.NoFeatures as e: vp('raised', e) set_trace() def test_1b(): msgs = make_messages_1() set_trace() try: r = trace_print(msgs, 'b') assert False, 'should have raised' except exception.NoFeatures as e: vp(e) # expect to be here def make_messages_2(): return make_messages( ('o2', 12, 'S'), ('p', 11, 'B'), ('o2', 10, 'B'), ('p', 9, 'S'), ('o1', 8, 'B'), ('o1', 7, 'S'), ('o2', 6, 'B'), ('p', 5, 'S'), ('o1', 4, 'B'), ('p', 3, 'S'), ('o2', 2, 'S'), ('o1', 1, 'S'), ('p', 0, 'B'), ) def len_features(fv): result = 0 for k, v in fv.items(): if k.startswith('id_'): pass else: result += 1 return result def test_2a(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2() fv, unused = trace_print(msgs, 'p', debug=False) assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o2') assert len(unused) == 2 assert len_features(fv) == 6 def test_2b(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2()[1:] # start at send message fv, unused = trace_print(msgs, 'p') assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 try: fv, unused = trace_print(msgs, 'o2') assert False, 'should have raised exception' except exception.NoFeatures as e: vp('expected exception', e) test_1a() test_1b() test_2a() test_2b()
def do_work(config, verbose=True): pdb.set_trace() vp = machine_learning.make_verbose_print(verbose) vpp = machine_learning.make_verbose_pp(verbose) connection = shared_queue.PrimitiveBlockingConnection( path_for_input=make_path_for_input, paths_for_output=ExchangeRoutingPathMaker(config.get('out_events_base')).make_paths_for_output, ) channel = connection.channel() secmaster = SecMaster( path=config.get('in_secmaster_path'), debug=False, ) issuer = secmaster.get_issuer(config.get('primary_cusip')) primary_cusip = config.get('primary_cusip') routing_key = 'events.%s' % primary_cusip exchange = 'dummy_exchange' event_queue = make_event_queue(config, issuer) otr_cusip = {} # key: cusip, value: int (>= 1) while True: try: event = next(event_queue) except StopIteration: break # all the event readers are empty # handle the event vp('\nnext event:', event.datetime, event.source, event.source_identifier) if event.source == 'trace': print('handle trace event') if event.payload['cusip'] == primary_cusip or event.payload['cusip'] in otr_cusip: vp('trace print for primary or OTR cusip') channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.TracePrint( source='trace_%s.csv' % issuer, identifier=event.source_identifier, cusip=event.payload['cusip'], issuepriceid=event.source_identifier, datetime=event.datetime, oasspread=float(event.payload['oas']), trade_type=event.payload['trade_type'], reclassified_trade_type=event.payload['reclassified_trade_type'], cancellation_probability=0.0, # for now )), ) else: vp('trace print for neither primary nor OTR cusip') elif event.source == 'liq_flow_on_the_run': if event.payload['primary_cusip'] == primary_cusip: vp('handle liq_flow event for the primary cusip') channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.SetPrimaryOTRs( source='liq_flow_on_the_run_%s.csv' % issuer, identifier=event.source_identifier, primary_cusip=event.payload['primary_cusip'], otr_cusips=(event.payload['otr_cusip'],), # must be an iterable )), ) otr_cusip[event.payload['otr_cusip']] = 1 # for now, just 1 OTR cusip else: vp('otr not for primary') elif event.source == 'etl.py': vp('handle etl.py event') if event.source_identifier == 'output_start': channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.OutputStart( source='elt.py', identifier=str(datetime.datetime.now()), )), ) elif event.source_identifier == 'output_stop': channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.OutputStop( source='elt.py', identifier=str(datetime.datetime.now()), )), ) elif event.source_identifier == 'primary_cusip': channel.publish( exchange=exchange, routing_key=routing_key, message=str(shared_message.SetCusipPrimary( source='etl.py', identifier=str(datetime.datetime.now()), cusip=event.payload['primary_cusip'], )), ) elif event.source_identifier == 'set_version': channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.SetVersion( source='etl.py', identifier=str(datetime.datetime.now()), what=event.payload['what'], version=event.payload['version'], )), ) else: print('invalid event.source_identifier %s' % event.source_identifier) pdb.set_trace() else: print(event) print('unknown event source') pdb.set_trace() print('processed all of the events') connection.close() analysis(config, make_event_queue(config, issuer))