def test_tps_make_feature_vectors(): return # the test code is deprecated # test 3 OTRs and consumption of entire set of messages vp = machine_learning.make_verbose_print(False) set_trace = machine_learning.make_set_trace(True) def make_trace_print_message(tp_info): index, cusip, rtt = tp_info return message.TracePrint( source='test_tps_make_feature_vectors', identifier=str(index), cusip=cusip, issuepriceid=str(index), datetime=datetime.datetime.now(), oasspread=float(index), trade_type=None, reclassified_trade_type=rtt, cancellation_probability=0.0, ) trace_prints = ( (0, 'p', 'B'), (1, 'o1', 'S'), (2, 'o2', 'S'), (3, 'p', 'S'), (4, 'o1', 'B'), (5, 'p', 'S'), (6, 'o2', 'B'), (7, 'o1', 'S'), (8, 'o1', 'B'), (9, 'p', 'S'), (10, 'o2', 'B'), (11, 'p', 'B'), (12, 'o2', 'S'), ) tps = TracePrintSequence() for tp_info in trace_prints: msg = make_trace_print_message(tp_info) tps.accumulate(msg) assert len(tps._msgs) == len(trace_prints) set_trace() for rtt in ('B', 'S'): feature_vectors = tps.feature_vectors( cusips=('p', 'o1', 'o2'), n_feature_vectors=2, required_reclassified_trade_type=rtt, trace=False, ) set_trace() assert len(feature_vectors) == 1 for i, fv in enumerate(feature_vectors): print(rtt, i, fv['id_trigger_identifier'], fv['id_target_oasspread']) vp(fv)
def trace_print(msgs: typing.List[shared_message.Message], cusip: str, debug=False) -> FeatureVector: 'return (Features from msgs, unused messages) or raise NoFeatures' # create features from a trace print and the prior trace print # return empty feature if not able to create features # the features are formed from two trace prints # The caller modifies the feature vector keys to include the name of this functions # in those keys, so that the keys will be unique across all features. So DO NOT # include the name of this function in the keys of the feature vector. def find_messages(msgs, cusip, reclassified_trade_type) -> typing.List[shared_message.Message]: 'attempt to find first 2 messages with specified attributes' 'return list of first 2 messages with the cusip and reclassified trade type and unused messages' result = [] for i, msg in enumerate(msgs): if msg.cusip == cusip and msg.reclassified_trade_type == reclassified_trade_type: result.append(msg) if len(result) == 2: return result, msgs[i + 1:] raise exception.NoFeatures('features.trace_print: not 2 %s %s messages' % (cusip, reclassified_trade_type)) def add_features(result: FeatureVector, rtt: str, msgs: typing.List[shared_message.Message]): # mutate result by adding features from 2 trace print messages' assert len(msgs) == 2 msg0 = msgs[0] # most recent message msg1 = msgs[1] # message just before the most recent message result['id_%s_msg0_issuepriceid' % rtt] = msg0.issuepriceid result['id_%s_msg1_issuepriceid' % rtt] = msg1.issuepriceid result['%s_oasspread' % rtt] = msg0.oasspread result['%s_oasspread_less_prior' % rtt] = msg0.oasspread - msg1.oasspread result['%s_oasspread_divided_by_prior' % rtt] = ( 100.0 if msg1.oasspread == 0.0 else msg0.oasspread / msg1.oasspread ) set_trace = machine_learning.make_set_trace(debug) vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace() B_messages, B_unused = find_messages(msgs, cusip, 'B') S_messages, S_unused = find_messages(msgs, cusip, 'S') result = FeatureVector() add_features(result, 'B', B_messages) add_features(result, 'S', S_messages) vp('features_B_and_S') vpp(result) set_trace() return result, B_unused if len(B_unused) < len(S_unused) else S_unused
def test_2a(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2() fv, unused = trace_print(msgs, 'p', debug=False) assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o2') assert len(unused) == 2 assert len_features(fv) == 6
def test_2b(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2()[1:] # start at send message fv, unused = trace_print(msgs, 'p') assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 try: fv, unused = trace_print(msgs, 'o2') assert False, 'should have raised exception' except exception.NoFeatures as e: vp('expected exception', e)
def test_FeatureVector(): set_trace = machine_learning.make_set_trace(False) vp = machine_learning.make_verbose_print(False) def test1(): set_trace() ok = FeatureVector() ok['id_trace_print'] = 'abc' ok['a'] = 10.0 def test2(): set_trace() bad = FeatureVector() try: bad['a'] = 1 # must be a float, but is not assert False, 'should have raised an exception' except exception.FeatureVector as e: vp('exception', e) except Exception as e: print('raised unexpected exception', e) assert False, 'should have rased exception.FeatureVector' test1() test2()
def test_Train(self): debug = False set_trace = machine_learning.make_set_trace(debug) vp = machine_learning.make_verbose_print(debug) set_trace() source = 'testing' identifier = '123' x = Train( source=source, identifier=identifier, feature_vectors=self.feature_vectors, ) vp(x) xx = from_string(str(x)) vp(xx) self.assertTrue(isinstance(x, Train)) self.assertTrue(isinstance(xx, Train)) self.assertEqual(xx.source, source) self.assertEqual(xx.identifier, identifier) vp(xx.feature_vectors) self.assertEqual(xx.feature_vectors, self.feature_vectors)
def test_trace_print(): set_trace = machine_learning.make_set_trace(False) vp = machine_learning.make_verbose_print(False) def make_messages(*tests): def make_message(test): cusip, info, rtt = test return shared_message.TracePrint( source='trace_print_test', identifier=str(info), cusip=cusip, issuepriceid=str(info), datetime=datetime.datetime.now(), oasspread=float(info), trade_type=rtt, reclassified_trade_type=rtt, cancellation_probability=0.0, ) msgs = [] for test in tests: msgs.append(make_message(test)) return msgs def make_messages_1(): return make_messages( ('a', 1, 'B'), ('a', 2, 'S'), ('a', 3, 'B'), ('a', 4, 'S'), ('b', 5, 'B'), ) def test_1a(): msgs = make_messages_1() set_trace() r = trace_print(msgs, 'a') vp('test_ok', r) set_trace() try: r = trace_print(msgs, 'b') assert False, 'should raise an exception' except exception.NoFeatures as e: vp('raised', e) set_trace() def test_1b(): msgs = make_messages_1() set_trace() try: r = trace_print(msgs, 'b') assert False, 'should have raised' except exception.NoFeatures as e: vp(e) # expect to be here def make_messages_2(): return make_messages( ('o2', 12, 'S'), ('p', 11, 'B'), ('o2', 10, 'B'), ('p', 9, 'S'), ('o1', 8, 'B'), ('o1', 7, 'S'), ('o2', 6, 'B'), ('p', 5, 'S'), ('o1', 4, 'B'), ('p', 3, 'S'), ('o2', 2, 'S'), ('o1', 1, 'S'), ('p', 0, 'B'), ) def len_features(fv): result = 0 for k, v in fv.items(): if k.startswith('id_'): pass else: result += 1 return result def test_2a(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2() fv, unused = trace_print(msgs, 'p', debug=False) assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o2') assert len(unused) == 2 assert len_features(fv) == 6 def test_2b(): debug = False vp = machine_learning.make_verbose_print(debug) vpp = machine_learning.make_verbose_pp(debug) set_trace = machine_learning.make_set_trace(debug) set_trace() msgs = make_messages_2()[1:] # start at send message fv, unused = trace_print(msgs, 'p') assert len(unused) == 0 assert len_features(fv) == 6 fv, unused = trace_print(msgs, 'o1') assert len(unused) == 1 assert len_features(fv) == 6 try: fv, unused = trace_print(msgs, 'o2') assert False, 'should have raised exception' except exception.NoFeatures as e: vp('expected exception', e) test_1a() test_1b() test_2a() test_2b()
def do_work(config): set_trace = machine_learning.make_set_trace(True) set_trace() n_required_feature_vectors = max(HpGrids.HpGrid5().n_trades_back_choices) feature_vector_identifiers = Identifier() n_required_feature_vectors = 300 # TODO: set based on model_specs creating_output = False all_trace_prints = [] cusips = [] # [0] = primary, [1] = otr 1, [2] = otr 2, ... connection = shared_queue.PrimitiveBlockingConnection( path_for_input=InputPathMaker(config.get('in_path_prefix')).make_path_for_input, paths_for_output=OutputPathMaker(config.get('out_path_prefix')).make_paths_for_output, ) channel = connection.channel() exchange = config.get('out_exchange') routing_key = 'events.%s.*' % config.get('primary_cusip') # send every message to all of the experts input_queue = 'events.%s' % (config.get('primary_cusip')) channel.publish( exchange=exchange, routing_key=routing_key, body=str(shared_message.SetVersion( source='events_cusips.py', identifier=str(datetime.datetime.now()), what='machine_learning', version='1.0.0.0', )), ) while True: try: s = channel.consume(input_queue) except StopIteration: break msg = shared_message.from_string(s) print('\n%r' % msg) if isinstance(msg, shared_message.BackToZero): pdb.set_trace() print('todo: implement BackToZero') elif isinstance(msg, shared_message.SetPrimaryOTRs): cusips = [msg.primary_cusip] for otr_cusip in msg.otr_cusips: cusips.append(otr_cusip) elif isinstance(msg, shared_message.SetVersion): channel.publish( exchange=exchange, routing_key=routing_key, body=str(msg), ) elif isinstance(msg, shared_message.TracePrint): def make_feature_vector(msgs, cusips): 'return (FeatureVector, unused msgs) or raise NoFeatures' if len(cusips) == 0: log_msg = ( 'A SetCusipOTRs message was not received before a StartOutput message was received' ) log.critical(log_msg) assert False, log_msg all_features = features.FeatureVector() shortest_unused = msgs for cusip in cusips: for name, maker in (('trace_print', features.trace_print),): try: fv, unused = maker(msgs, cusip) except exception.NoFeatures as e: raise e if len(unused) < len(shortest_unused): shortest_unused = copy.copy(unused) # rename the feature to use the name # that makes the features unique for k, v in fv.items(): key = ( 'id_%s_%s_%s' % (name, cusip, k[3:]) if k.startswith('id_') else '%s_%s_%s' % (name, cusip, k) ) all_features[key] = v # create a unique identifier for the feature vector all_features['id_feature_vector'] = feature_vector_identifiers.get_next() # add id info from TracePrint for the first primary cusip for msg in msgs: if msg.cusip == cusips[0]: # found first trade for the primary cusip all_features['id_primary_cusip'] = msg.cusip all_features['id_primary_cusip_issuepriceid'] = msg.issuepriceid all_features['id_primary_cusip_oasspread'] = msg.oasspread all_features['id_primary_cusip_reclassified_trade_type'] = msg.reclassified_trade_type break return all_features, shortest_unused # handle corrections made_correction = False for i, old_trace_print in enumerate(all_trace_prints): if old_trace_print.issuepriceid == msg.issuepriceid: all_trace_prints[i] = msg # replace the previous trace print message made_correction = True break if not made_correction: all_trace_prints.append(msg) if creating_output: all_msgs = list(reversed(all_trace_prints)) try: last_feature_vector, shortest_unused = make_feature_vector(all_msgs, cusips) except exception.NoFeatures as e: # info ==> things are working as expected log.info('unable to create even one feature vector: %s' % str(e)) continue channel.publish( exchange=exchange, routing_key='%s.expert.*' % config.get('primary_cusip'), # {cusip}.expert.{model_spec} body=str(message.Test( source='events_cusip.py', identifier=last_feature_vector['id_feature_vector'], feature_vector=last_feature_vector, )), ) pdb.set_trace() many_feature_vectors = [last_feature_vector] for i in range(1, len(all_msgs), 1): try: another_feature_vector, unused = make_feature_vector(all_msgs[i:]) except exception.NoFeatures as e: log.info('unable to create %dth feature vector: %s' % (i, str(e))) break many_feature_vectors.append(another_feature_vector) if len(unused) < len(shortest_unused): shorted_unused = copy.copy(unused) pdb.set_trace() if len(many_feature_vectors) >= n_required_feature_vectors: pdb.set_trace() print('create B and S targets') print('reconcile targets with features') channel.publish( exchange=exchange, routing_key='%s.expert.*' % config.get('primary_cusip'), body=str(message.Train( source='events_cusip.py', identifier=last_feature_vector['id_feature_vector'], feature_vectors=many_feature_vectors, # TODO: include both B and S targets )), ) # discard TracePrint objects that will never be used all_trace_prints = all_trace_prints[len(shorted_unused):] elif isinstance(msg, shared_message.TracePrintCancel): pdb.set_trace() print('todo: implement TracePrintCancel') elif isinstance(msg, shared_message.OutputStart): creating_output = True elif isinstance(msg, shared_message.OutputStop): pdb.set_trace() creating_output = False else: print(msg) print('%r' % msg) print('unrecognized input message type') pdb.set_trace() print('have read all messages') pdb.set_trace() connection.close()
def feature_vectors(self, cusips: typing.List[str], # primary, otr1, otr2, ... n_feature_vectors: int, required_reclassified_trade_type: str, trace=False, ): 'return List[feature_vectors] with length up to n_feature_vectors' set_trace = machine_learning.make_set_trace(trace) def find_cusip(msgs, cusip): 'return first message with the specified cusip' if False and trace: pdb.set_trace() for msg in msgs: if msg.cusip == cusip: return msg pdb.set_trace() raise exception.NoMessageWithCusip( cusip=cusip, msgs=msgs, ) def find_cusip_rtt(msgs, cusip, rtt): 'return first msg with cusip and remaining messages' if False and trace: pdb.set_trace() for i, msg in enumerate(msgs): if msg.cusip == cusip and msg.reclassified_trade_type == rtt: return msg, msgs[i + 1:] pdb.set_trace() raise exception.NoPriorEventWithCusipAndRtt( cusip=cusip, rtt=rtt, msgs=msgs, ) def cusip_features(msgs, cusip): 'return dict, unused_msgs' if False and trace: pdb.set_trace() result_dict = {} result_unused_messages = msgs for rtt in ('B', 'S'): first_msg, first_other_messages = find_cusip_rtt(msgs, cusip, rtt) second_msg, second_other_messages = find_cusip_rtt(first_other_messages, cusip, rtt) result_dict['id_first_%s_message_source' % rtt] = first_msg.source result_dict['id_first_%s_message_identifier' % rtt] = first_msg.identifier result_dict['id_second_%s_message_source' % rtt] = second_msg.source result_dict['id_second_%s_message_identifier' % rtt] = second_msg.identifier result_dict['trace_%s_oasspread' % rtt] = first_msg.oasspread result_dict['trace_%s_oasspread_less_prior_%s' % (rtt, rtt)] = ( first_msg.oasspread - second_msg.oasspread ) result_dict['trace_%s_oasspread_divided_by_prior_%s' % (rtt, rtt)] = ( first_msg.oasspread / second_msg.oasspread if second_msg.oasspread != 0.0 else 100.0 ) if len(second_other_messages) < len(result_unused_messages): result_unused_messages = copy.copy(second_other_messages) return result_dict, result_unused_messages def feature_vector(msgs, cusips, required_reclassified_trade_type): 'return (feature_vector, unused messages)' def key_with_cusip_info(k, i): first, *others = k.split('_') return '%s_%s_%s' % ( first, 'primary' if i == 0 else 'otr%d' % i, k[len(first) + 1:], ) if False and trace: pdb.set_trace() vp = machine_learning.make_verbose_print(False) result_unused_messages = msgs # NOTE: these field names are used by message.FeatureVectors.__repr__() # Don't change them here unless you also change them there trace_print_with_oasspread, _ = find_cusip_rtt( msgs, cusips[0], required_reclassified_trade_type, ) vp('trace_print_with_oasspread', trace_print_with_oasspread) result_feature_vector = { 'id_target_oasspread': trace_print_with_oasspread.oasspread, 'id_target_reclassified_trade_type': required_reclassified_trade_type, 'id_trigger_source': msgs[0].source, 'id_trigger_identifier': msgs[0].identifier, 'id_trigger_reclassified_trade_type': msgs[0].reclassified_trade_type, 'id_trigger_event_datetime': msgs[0].datetime, } for i, cusip in enumerate(cusips): cf, unused_messages = cusip_features(msgs, cusip) vp('cusip_features result', i, cusip, len(cf), len(unused_messages)) result_feature_vector['id_feature_vector_%s' % ('primary' if i == 0 else 'otr%d' % i)] = cusip for k, v in cf.items(): # adjust the keys to reflect whether the features are from the primary or OTR cusip result_feature_vector[key_with_cusip_info(k, i)] = v if len(unused_messages) < len(result_unused_messages): result_unused_messages = copy.copy(unused_messages) return result_feature_vector, result_unused_messages def loop(msgs): 'return (feature_vectors, unused messages)' vp = machine_learning.make_verbose_print(False) set_trace() feature_creators = ( ('trace_print', features.trace_print), ) result_feature_vectors = [] result_unused = msgs pdb.set_trace() for i in range(0, n_feature_vectors, 1): msgs_to_be_used = msgs[i:] all_features = features.Features() for feature_creator in feature_creators: for cusip in cusips: try: cusip_features, unused = feature_creator[1](msgs_to_be_used, cusip) except exception.NoFeatures as e: raise exception.Features('cusip %s, %s' % (cusip, e.msg)) if len(unused) < len(result_unused): result_unused = copy.copy(unused) # update feature names to incorporate the cusip for k, v in cusip_features.items(): key = ( 'id_%s_%s' (cusip, k[3:]) if k.startwith('id_') else '%s_%s_%s' (feature_creator[0], cusip, k) ) all_features.add(key, v) continue # bypass old code, for now # try: # fv, unused = feature_vector(msgs_to_be_used, cusips, required_reclassified_trade_type) # vp('loop %d: fv trigger identifier: %s len(msgs): %d, len(unused): %d' % ( # i, # fv['id_trigger_identifier'], # len(msgs_to_be_used), # len(unused), # )) # if False and i % 10 == 1: # pdb.set_trace() # result_feature_vectors.append(fv) # if len(unused) < len(result_unused): # result_unused = copy.copy(unused) # except exception.NoPriorEventWithCusipAndRtt as e: # vp('stub: handle exception %s' % e) # break # except exception.NoMessageWithCusip as e: # vp('stub: handle exception %s' % e) # break set_trace() return list(reversed(result_feature_vectors)), result_unused set_trace() assert n_feature_vectors >= 0 result, unused = loop(list(reversed(self._msgs))) set_trace() self._msgs = self._msgs[len(unused):] if True: print('check that we get same results using possibly fewer messages') set_trace() # test: should get same feature vectors (the result) result2, unused2 = loop(list(reversed(self._msgs))) assert len(result) == len(result2) assert len(unused2) == 0 for i, item in enumerate(result): item2 = result2[i] for k, v in item.items(): assert item2[k] == v set_trace() return result