def __init__(self, version): super().__init__(__file__) self.window_size = 15 self.interval_seconds = [15 * 60] # 15 minutes self.contruct_time = time.time() self.version = version self.sleep_seconds = 1 # must be low enough to produce empty result set eventually > reaktime self.transaction_min_timestamp = int( AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP')) self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') Log.d('construct: {}', self.__dict__) self.db = DatabaseGateway() max_history_minutes = 10 * 24 * 60 #max(self.minute_intervals) self.from_currency_ids = [] self.to_currency_ids = [] self.run_config = self.read_run_config() self.jobs = list( self.__jobs_iterate(max_history_minutes, self.run_config)) Log.i('count of generator jobs: {}', len(self.jobs))
class WatchApp(): def __init__(self): Log.d('construct') self.db = DatabaseGateway() def watch_continuously(self, watch_interval_seconds): Log.i('continuous watching activated with interval of {} seconds', watch_interval_seconds) consecutive_error_count = 0 while True: try: self.__verify_datafetch_apis_write_frequency() consecutive_error_count = 0 except Exception as e: consecutive_error_count += 1 Log.e('fail during watcher check ({} consecutive errors)', consecutive_error_count) stacktrace = OsExpert.stacktrace() Log.d('stacktrace:\n{}', stacktrace) time.sleep(watch_interval_seconds) def __verify_datafetch_apis_write_frequency(self): Log.d('watcher check initiating') datafetch_apis_frame = self.db.datafetch_api_view_frame() if datafetch_apis_frame.empty: Log.d('no datafetch apis to watch') else: exceed_count = 0 for i, row in datafetch_apis_frame.iterrows(): datafetch_api_id = row['id'] write_idle_seconds = row['write_idle_seconds'] result_frequency_seconds = row['result_frequency_seconds'] if write_idle_seconds > result_frequency_seconds: exceed_count += 1 idle_time_str = Timespan.from_seconds( write_idle_seconds).as_string() warn_message = 'datafetch api id {} has exceeded its {} second limit (idle time {})'.format( datafetch_api_id, result_frequency_seconds, idle_time_str) Log.w(warn_message) Log.d('watch check done, exceed count: {}', exceed_count)
class GeneratorApp(App): def __init__(self, version): super().__init__(__file__) self.window_size = 15 self.interval_seconds = [15 * 60] # 15 minutes self.contruct_time = time.time() self.version = version self.sleep_seconds = 1 # must be low enough to produce empty result set eventually > reaktime self.transaction_min_timestamp = int( AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP')) self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') Log.d('construct: {}', self.__dict__) self.db = DatabaseGateway() max_history_minutes = 10 * 24 * 60 #max(self.minute_intervals) self.from_currency_ids = [] self.to_currency_ids = [] self.run_config = self.read_run_config() self.jobs = list( self.__jobs_iterate(max_history_minutes, self.run_config)) Log.i('count of generator jobs: {}', len(self.jobs)) def read_run_config(self): with open('generator_config.py', 'r') as config_file: return eval(config_file.read()) def agent_prefix(self, agent_type, agent_params): param_tokens = [ '{}-{}'.format(name.lower().replace('_', ''), str(value).replace('0.', '0')) for name, value in agent_params.items() ] return 'feature_{}({})_'.format( agent_type.NAME, # ''.join(type_tokens), '|'.join(param_tokens)) def explode_args(self, args_dicts): # TODO: convert to yield if args_dicts is None or len(args_dicts) == 0: return [{}] total_results = [] for args_dict in args_dicts: results = [] for key, values in args_dict.items(): new_results = [] for value in values: for dict_result in (results if len(results) > 0 else [{}]): new_results.append({**dict_result, key: value}) results = new_results total_results += results return total_results def __jobs_iterate(self, frame_history_minutes, run_config): datasources = self.db.datasources() exchanges = self.db.exchanges() currencies = self.db.currencies() real_currencies = [c for c in currencies if c.is_crypto == False] crypto_currencies = [c for c in currencies if c.is_crypto == True] agents = [ CandlestickFeature, PassthroughFeature, MovingAverageCrossOverAgent, ExpMovingAverageCrossOverAgent, StochasticOscillatorAgent, VolumeAgent, HodrickPrescottFeature, RetroSpecAgent ] assert all( agent.KEYS.count(key) == 1 for agent in agents for key in agent.KEYS) # ensure agents have no duplicate keys for datasource in [ d for d in datasources if d.name in run_config['datasource_name'] ]: for exchange in [ e for e in exchanges if e.name in run_config['exchange_name'] ]: for from_currency in [ cc for cc in crypto_currencies if cc.code in run_config['from_currency_code'] ]: #, 'ETH', 'XRP']]: self.from_currency_ids.append(from_currency.id) for to_currency in [ rc for rc in real_currencies if rc.code in run_config['to_currency_code'] ]: self.to_currency_ids.append(to_currency.id) for interval_second in self.interval_seconds: for agent_name in run_config['agent_config'].keys( ): assert len( [ a for a in agents if a.__name__ == agent_name ] ) == 1, 'not exactly one entry for agent name {}'.format( agent_name) yield GeneratorJob( datasource=datasource, exchange=exchange, from_currency=from_currency, to_currency=to_currency, interval_second= interval_second, #[5*60, 15*60, 30*60, 3600, 3600*12] features=[ self.create_agent(agent_type, exploded_agent_params) for agent_type in agents for config_agent_name, config_agent_params in run_config['agent_config'].items() for exploded_agent_params in self.explode_args(config_agent_params) if agent_type.__name__ == config_agent_name ], uid='{}_{}_{}_{}_{}'.format( datasource.name, exchange.name, from_currency.code, to_currency.code, interval_second)) def create_agent(self, agent_type, args): Log.d('creating agent {} with args: {}', agent_type.__name__, args) return agent_type(col_prefix=self.agent_prefix(agent_type, args), **args) def feed_jobs_forever(self, job_changed_handler): assert job_changed_handler is not None sleep_seconds = self.sleep_seconds transaction_min_timestamp = self.transaction_min_timestamp start_transaction_min_timestamp = transaction_min_timestamp data_dirpath = self.data_dirpath start_time = time.time() Log.i( 'processing transactions, sleep interval {}s, starting from epoch {} ({})', sleep_seconds, transaction_min_timestamp, StringExpert.format_timestamp(transaction_min_timestamp)) to_fetch_count = self.db.transaction_count(transaction_min_timestamp) Log.d('transaction count since {} ({}): {}', transaction_min_timestamp, StringExpert.format_timestamp(transaction_min_timestamp), to_fetch_count) pd.set_option('io.hdf.default_format', 'table') hdf5_filename = '{}_{}_{}.h5'.format( self.version.major, self.version.minor, datetime.fromtimestamp(start_time).strftime('%Y%m%d_%H%M%S')) hdf5_filepath = path.join(data_dirpath, hdf5_filename) Log.i('hdf5 output filepath is: \n{}', hdf5_filepath) set_size = 1000 fetch_count = 0 plot_time = time.time() is_realtime = False while True: try: next_transaction_min_timestamp = self.process_transaction_subset( transaction_min_timestamp, set_size, hdf5_filepath, job_changed_handler, is_realtime) if next_transaction_min_timestamp is None: Log.d('nothing to process, waiting..') is_realtime = True # TODO: empty polling perhaps not the best indicator of switch to realtime time.sleep(sleep_seconds) else: assert next_transaction_min_timestamp > transaction_min_timestamp, 'next minimum timestamp was not greater than the current timestamp' transaction_min_timestamp = next_transaction_min_timestamp fetch_count += set_size percentage = 100 * fetch_count / to_fetch_count current_time = time.time() Log.d( 'processed {}/{}, {}%, spent {} on the period {} ({}) to {} ({})', fetch_count, to_fetch_count, int(percentage), Timespan.from_seconds(int(current_time - start_time)).as_string(), StringExpert.format_timestamp( start_transaction_min_timestamp), start_transaction_min_timestamp, StringExpert.format_timestamp( transaction_min_timestamp), transaction_min_timestamp) except Exception as e: raise Exception( 'Failed to process nonparsed api responses') from e Log.w('all {} rows read, but should loop forever', row_count) def process_transaction_subset(self, transaction_min_timestamp, set_size, hdf5_filepath, job_changed_handler, is_realtime): assert job_changed_handler is not None, 'no job_changed_handler provided' window_size = 10 subset_process_start_time = time.time() frame = self.db.transaction_by_timestamp_frame( transaction_min_timestamp, set_size, self.from_currency_ids, self.to_currency_ids) frame.set_index('epoch_time', inplace=True) row_count = frame.shape[0] Log.d('...time spent fetching subset ({} rows) from db: {:.2f}s', row_count, time.time() - subset_process_start_time) if row_count == 0: return None row_process_count = 0 last_epoch_time = None Log.d('...processing rows...') row_process_start_time = time.time() gap_resolver = self.run_config['gap_resolver'] for epoch_time, row in frame.iterrows(): is_row_processed = False try: transaction_id = row['id'] datasource_id = row['datasource_id'] exchange_id = row['exchange_id'] from_currency_id = row['from_currency_id'] to_currency_id = row['to_currency_id'] price = np.float64(row['price']) volume = np.float64(row['volume']) transaction_min_timestamp = epoch_time #transaction_id + 1 seconds_since_previous = 0 if last_epoch_time is None else epoch_time - last_epoch_time Log.t('seconds since previous epoch time: {}', seconds_since_previous) if last_epoch_time is not None: assert epoch_time >= last_epoch_time, 'epoch time ({}) was less than the previous epoch time ({})'.format( epoch_time, last_epoch_time) seconds_since_previous = 0 if last_epoch_time is None else epoch_time - last_epoch_time assert seconds_since_previous >= 0, 'seconds_since_previous cannot be a negative value' last_epoch_time = epoch_time for job in self.jobs: if (job.datasource.id == datasource_id and job.exchange.id == exchange_id and job.from_currency.id == from_currency_id and job.to_currency.id == to_currency_id): is_row_processed = True try: h5frame = job.frame if h5frame is not None: # perfrom integrity check on existing = non-empty dataframe assert not h5frame.empty # should not be possible if the frame has previously been created last_epoch = h5frame.index.values[-1] seconds_since_previous = epoch_time - last_epoch assert seconds_since_previous >= 0 max_gap_seconds = 120 # TODO make config setting if (seconds_since_previous > max_gap_seconds ): # TODO make config setting warn_message = 'excessive time (+{}s) passed since previous observation: {}s ({}) between {} ({}) and {} ({})'.format( max_gap_seconds, seconds_since_previous, Timespan.from_seconds( int(seconds_since_previous) ).as_string(), last_epoch, StringExpert.format_timestamp( last_epoch), epoch_time, StringExpert.format_timestamp( epoch_time)) if gap_resolver is None: raise Exception(warn_message) Log.w(warn_message) prev_observation = h5frame.iloc[-1] df_intermediates = gap_resolver.intermediates_frame( max_gap_seconds, from_epoch=last_epoch, to_epoch=epoch_time, from_price=prev_observation['latest'], to_price=price, from_volume=prev_observation['volume'], to_volume=volume) Log.d( 'simulating intermediate observations:\n{}', df_intermediates) simulated_count = 0 for intermediate_epoch, intermediate in df_intermediates.iterrows( ): job_observation = job.job_observe( value=intermediate['price'], epoch_time=intermediate_epoch, volume=intermediate['volume'], is_simulated=True, is_realtime=False) assert job_observation is not None simulated_count += 1 if simulated_count % 1000 == 0: Log.d('..simulated {}/{}..', simulated_count, len(df_intermediates)) Log.i( 'done simulating {} observations up until epoch {} ({})', len(df_intermediates), epoch_time, StringExpert.format_timestamp( epoch_time)) try: job_observation = job.job_observe( value=price, epoch_time=epoch_time, volume=volume, is_simulated=False, is_realtime=is_realtime) row = job_observation # job_observation_to_frame_row(volume, job_observation) assert row is not None job_changed_handler(job) except DoubleObservationError as doe: Log.w( 'epoch already in frame, will be ignored ({})', epoch_time) except Exception as job_e: raise Exception( 'Failed to feed row to job') from job_e except Exception as e: raise Exception( 'Failed to process row index {}'.format(epoch_time)) from e if is_row_processed: row_process_count += 1 Log.d('...time spent processing {}/{} rows in time: {:.2f}s', row_process_count, frame.shape[0], time.time() - row_process_start_time) with pd.HDFStore(hdf5_filepath, mode='a') as h5: h5_process_start_time = time.time() start_observation_epoch = frame.index.values[0] for job in self.jobs: df_to_append = job.frame[ job.frame.index >= start_observation_epoch] try: h5.append(job.uid, df_to_append, format='table', data_columns=True) row_count = h5.get_storer(job.uid).nrows Log.d('...h5 key {}, row count is {}', job.uid, row_count) except Exception as append_error: raise append_error Log.d('...time spent adding to h5: {:.2f}s', time.time() - h5_process_start_time) row_processing_time = time.time() - subset_process_start_time Log.d('...total time spent on subset: {:.2f}s ({:.2f}s per row)', row_processing_time, row_processing_time / row_process_count) return transaction_min_timestamp
def setUp(self): self.db = DatabaseGateway() pass
def __init__(self): Log.d('construct') self.db = DatabaseGateway()
import io import os import hashlib import requests import hmac import websocket import simplejson as json from applogging import Log from core import AppConfig, OsExpert from db import DatabaseGateway sys.path.append('..') AppConfig.initialize_in_file_dir(OsExpert.path_backstep(__file__)) file_path = os.path.realpath(__file__) print(file_path) db = DatabaseGateway() datafetch_api_id = db.datafetch_api_id_by_handler_filepath(file_path) print(datafetch_api_id) exit() def prettyJson(jsonData): return json.dumps(jsonData, indent=4, sort_keys=True) publicKey = AppConfig.setting("BITCOINAVERAGE_PUBLIC_KEY") secretKey = AppConfig.setting("BITCOINAVERAGE_SECRET_KEY") url = "https://apiv2.bitcoinaverage.com/websocket/get_ticket" print('public key: ' + publicKey) print('secret key: ' + secretKey) timestamp = int(time.time())
transaction = { 'datasource_id': datasource_id, 'exchange_id': exchange_id, 'amount': amount, 'price': price, 'currency_id': currency_id, 'epoch_time': epochTime, } try: db.create_transaction(transaction) insertCount += 1 except DuplicateInsertException as e: duplicateCount += 1 os.remove(filepath) Log.i('Done processing, insert count: {}, duplicate count: {}', insertCount, duplicateCount) db = DatabaseGateway() currencies = db.currencies_frame() datasources = db.datasources_frame() for i, job in enumerate(jobs): url = job['url'] Log.i('Processing job {}/{}'.format(i + 1, len(jobs))) start_time = datetime.datetime.now() datasource_id = db.datasource_id_by_name(job['datasource_name']) exchange_id = db.exchange_id_by_name(job['provider_name']) currency_code = db.currency_id_by_code(job['currency_code']) retrieve( db, url, datasource_id, exchange_id, currency_code