async def check_for_alert_match(self): urls = [ 'https://twitter.com/CFTC', 'https://twitter.com/sec_enforcement?lang=en', 'https://twitter.com/ushouserep?lang=en' ] strip_texts = None with open('ignore-lines.json', 'r') as f: strip_texts = json.load(f) Log.d('checking {} sources, ignoring {} lines..', len(urls), len(strip_texts)) patterns = [ r'.{,200}bitcoin.{,200}', r'.{,200}crypto.{,200}', r'.{,200}virtual currency.{,200}', ] for url in urls: async with aiohttp.ClientSession() as session: html_text = await self.__fetch(session, url) text = StringExpert.strip_tags(html_text) text = html.unescape(text) for strip_text in strip_texts: text = text.replace(strip_text, '') for pattern in patterns: match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE) if match is not None: matched_line = match.group() warning = 'Found pattern "{}" at url "{}" in line: {}'.format(pattern, url, matched_line) Log.w(warning) return True return False
def __init__(self, h5_filepath, version): warnings.simplefilter('ignore', NaturalNameWarning) h5_inputfile = Path(h5_filepath) output_dirpath = AppConfig.setting('PREDICTOR_DATA_DIRPATH') self.h5_out_filepath = os.path.join(output_dirpath, h5_inputfile.name) h5_out_file = Path(self.h5_out_filepath) if h5_out_file.exists(): Log.i('overwrite file?: {}', h5_out_file) if not OsExpert.prompt_confirm('File already exists, overwrite? {}'.format(h5_out_file)): Log.d('user aborted, exiting') exit() Log.w('removing file: {}', h5_out_file) os.remove(self.h5_out_filepath) self.predictors_map = {} base_filepath = output_dirpath with pd.HDFStore(h5_filepath, mode='r') as h5: keys = h5.keys() Log.i('h5 input keys: {}', keys) assert len(keys) == 1, 'harcoded restriction on single key was violated' for key in keys: Log.i('row count for {}: {}', key, h5.get_storer(key).nrows) self.predictors_map[key] = [ EnsemblePredictor(min_predict_generator_size=2000, max_train_size=5000) ] self.h5_watcher = H5FileWatcher(h5_filepath, self.handle_job_epoch, {'is_simulated': 0})
def retrieve(db, url, datasource_id, exchange_id, currency_id): temp_dirpath=AppConfig.setting('TEMP_DIRPATH') filepath = os.path.join(temp_dirpath, url.split('/')[-1]) downloadFile(url, filepath) duplicateCount = 0 insertCount = 0 with gzip.open(filepath, 'rt') as f: Log.d('Processing csv file..') spamreader = csv.reader(f, delimiter=',', quotechar='|') for row in spamreader: timeStr = row[0] epochTime = int(timeStr) priceStr = row[1] price = float(priceStr) amountStr = row[2] amount = float(amountStr) transaction = { 'datasource_id': datasource_id, 'exchange_id': exchange_id, 'amount': amount, 'price': price, 'currency_id': currency_id, 'epoch_time': epochTime, } try: db.create_transaction(transaction) insertCount += 1 except DuplicateInsertException as e: duplicateCount += 1 os.remove(filepath) Log.i('Done processing, insert count: {}, duplicate count: {}', insertCount, duplicateCount)
async def __process_subscriber(self, index, subscriber): fail_count = 0 response_file_prefix = subscriber.handler_filename while True: try: Log.i('invoking subscriber {}', subscriber.handler_filename) async for response_text in subscriber.subscribe(): response_text_md5hash = StringExpert.md5hash(response_text) try: epoch = int(time.time()) filepath = os.path.join( self.data_response_dirpath, '{}.{}.{}'.format(response_file_prefix, epoch, FetchApp.RESPONSE_EXTENSION) ) with open(filepath, 'w') as file: file.write(response_text) except Exception as e: Log.e('Failed to save response to file, message: {}', e) Log.d('stored api response for subcriber {} (hash {})', subscriber.handler_filename, response_text_md5hash) except Exception as e: fail_count += 1 Log.e('failed to invoke subscriber {} ({} failures so far)', subscriber.handler_filename, fail_count) stacktrace = traceback.format_exc() Log.d('exception stack:\n{}', stacktrace) Log.i('retrying in {} seconds..', self.retry_delay_seconds) await asyncio.sleep(self.retry_delay_seconds)
def __init__(self): super().__init__(__file__) Log.d('construct') self.dir_path = AppConfig.setting('DATA_RESPONSE_DIRPATH') self.store = Store() self.subscribers = subscribe.all() self.parse_util = ParseUtil(self.subscribers, self.store)
def __init__(self, min_predict_generator_size, max_train_size): super().__init__(predict_col='feature_rtrspc()_next_trend_pricefeature') assert max_train_size > min_predict_generator_size self.min_predict_generator_size = min_predict_generator_size self.max_train_size = max_train_size self.predictor = None Log.d('core count: {}', core_count)
def filter_simulated_observations(self, df): filtered_df = df[df['is_simulated'] != 1] dropped = df[~df.index.isin(filtered_df.index)] if len(dropped) > 0: Log.w('filtered out {} simulated frames', len(dropped)) else: Log.d('no simulated frames found to filter out') return filtered_df
def __init__(self): super().__init__(__file__, isToNotifyStartup=False) self.maxEmailReccurenceMinutes = float( AppConfig.setting('LOGWATCH_EMAIL_MAX_RECCURENCE_MINUTES')) self.triggerLines = ['ERROR', 'WARNING'] Log.d('construct: {}', self.__dict__) self.matchCountSinceLastEmail = 0 self.lastEmailDatetime = None
def tryAppNotifyByEmail(serviceName, messsage): if AppConfig.setting('IS_EMAIL_NOTIFICATION_ENABLED') != '1': Log.d('ignoring email request per configuration') return False alertEmail = AppConfig.setting('ALERT_EMAIL') hostName = socket.gethostname() return NetworkExpert.emailMaybe( alertEmail, alertEmail, '*** {}: {} ***'.format(hostName, serviceName), messsage)
def __init__(self): super().__init__(__file__) Log.d('construct') retry_delay_seconds = int(AppConfig.setting('DATAFETCH_API_RETRY_DELAY_SECONDS')) data_response_dirpath = AppConfig.setting('DATA_RESPONSE_DIRPATH') Log.d('data response dirpath is: {}', data_response_dirpath) self.retry_delay_seconds = retry_delay_seconds self.data_response_dirpath = data_response_dirpath OsExpert.ensure_abs_dirpath_exists(data_response_dirpath) self.subscribers = subscribe.all()
def create_predictor_from_csv(self): Log.i('initiating sagemaker model creation') role = AppConfig.setting('AWS_PREDICTOR_ROLE') bucket='cryptrade-sagemaker' custom_code_upload_location = 's3://{}/customcode/tensorflow_iris'.format(bucket) model_artifacts_location = 's3://{}/artifacts'.format(bucket) Log.d('training data will be uploaded to: {}', custom_code_upload_location) Log.d('training artifacts will be uploaded to: {}', model_artifacts_location) sess = sagemaker.Session() def upload_to_s3(channel, filepath, skip_if_name_and_size_matches=False): file = Path(filepath) """From SM examples. Like here: https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb""" s3 = boto3.resource('s3') key = channel + '/' + file.name bucket_ref = s3.Bucket(bucket) objs = list(bucket_ref.objects.filter(Prefix=key)) is_file_already_existing = len(objs) > 0 and objs[0].key == key if is_file_already_existing is True: if skip_if_name_and_size_matches is True: s3_client = boto3.client('s3') response = s3_client.head_object(Bucket=bucket, Key=key) local_size = file.stat().st_size remote_size = response['ContentLength'] if remote_size == local_size: Log.w('skipping upload as s3 key of same size ({:.2f}kb) already exists: {}', local_size/1000, key) return Log.w('overwriting existing s3 key: {}', key) with open(filepath, "rb") as data: s3.Bucket(bucket).put_object(Key=key, Body=data) s3_data_folder = 'data' upload_to_s3(s3_data_folder, self.train_filepath, True) upload_to_s3(s3_data_folder, self.test_filepath, True) upload_to_s3(s3_data_folder, self.meta_filepath) estimator = TensorFlow( entry_point='aws_dnn_predictor_entry.py', role=role, output_path=model_artifacts_location, code_location=custom_code_upload_location, train_instance_count=1, train_instance_type='ml.c5.xlarge', training_steps=1000, evaluation_steps=100 ) train_data_location = 's3://{}/{}'.format(bucket, s3_data_folder) Log.i('fitting train data: {}', train_data_location) estimator.fit(train_data_location) Log.i('deploying model') deploy_start = datetime.now() predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.t2.medium' ) deploy_end = datetime.now() Log.i('deployed predictor in {}s, endpoint is:\n{}', deploy_end - deploy_start, predictor.endpoint) self.predictor = predictor
def __init__(self, h5_filepath, row_handler, contraints_dict=None): self.handle_event = Event() self.h5_filepath = h5_filepath self.handle_count = 0 self.job_frames = {} self.last_handle_count = None self.row_handler = row_handler self.contraints_clause = '' if contraints_dict is None else ' '.join( 'and {}={}'.format(k, v) for k, v in contraints_dict.items()) Log.d('cc: {}', self.contraints_clause) assert row_handler
def downloadFile(url, filepath): if url is None: raise ValueError('parameter "value" not specified') if filepath is None: raise ValueError('parameter "filepath" not specified') Log.d('Downloading to path {}: {}'.format(filepath, url)) r = requests.get(url, stream=True) # NOTE the stream=True parameter with open(filepath, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk)
def print_acc(self, df): Log.d('begin acc calc ======') y_predict_colname = 'prediction_ensmbl_next_trend_feature' #'prediction_awsdnn_next_trend' y_true_colname = 'feature_rtrspc()_next_trend_pricefeature' df = df[[y_predict_colname, y_true_colname]] filtered = df.dropna(how='any') Log.d('acc source frame:\n{}', filtered) Log.d('dropped {}/{} rows where either the predictor or the true value was unspecified', len(df) - len(filtered), len(df)) y_predict = filtered[y_predict_colname] y_true = filtered[y_true_colname] score = accuracy_score(y_true, y_predict, normalize=True) Log.d('accuracy: {}', score) Log.d('===== end acc calc ')
def __predict(self, df): max_prediction_count = 100 if self.predict_count >= max_prediction_count: Log.w('too many predictions {} reached, exiting', self.predict_count) exit() assert len(df) == 1 X_all, y_all = self.frame_to_ml_inputs(df) predict_row = X_all.iloc[0] Log.d('predicting based on {} values:\n{}', len(predict_row.values), predict_row.squeeze().sort_index()) prediction_response = self.predictor.predict(predict_row.values) prediction = self.sagemaker_response_highest_score_label(prediction_response) self.predict_count += 1 return prediction
def unparsed_datafetch_api_responses_frame(self, min_id=0, limit=100): sql = """ SELECT {0}.* FROM {0} LEFT OUTER JOIN {1} ON {1}.source_md5hash = {0}.response_md5hash WHERE {1}.source_md5hash IS NULL AND {0}.id >= {2} ORDER BY {0}.id LIMIT {3} """.format('datafetch_api_response', 'transaction', min_id, limit) Log.d('executing:\n{}', sql) sys.stdout.flush() return self.__query_frame(sql)
def watch_continuously(self, watch_interval_seconds): Log.i('continuous watching activated with interval of {} seconds', watch_interval_seconds) consecutive_error_count = 0 while True: try: self.__verify_datafetch_apis_write_frequency() consecutive_error_count = 0 except Exception as e: consecutive_error_count += 1 Log.e('fail during watcher check ({} consecutive errors)', consecutive_error_count) stacktrace = OsExpert.stacktrace() Log.d('stacktrace:\n{}', stacktrace) time.sleep(watch_interval_seconds)
def email_maybe(self, header, message): now = datetime.now() if self.lastEmailDatetime is not None: minutesSinceLastEmail = ( now - self.lastEmailDatetime).total_seconds() / 60.0 if minutesSinceLastEmail < self.maxEmailReccurenceMinutes: timeLeftMinutes = int(self.maxEmailReccurenceMinutes - minutesSinceLastEmail) Log.d( 'Aborting email notification ({}+ minutes left in window)', timeLeftMinutes) return self.lastEmailDatetime = now self.matchCountSinceLastEmail = 0 NetworkExpert.tryAppNotifyByEmail(header, message)
def frame(mode, filename, from_epoch, to_epoch, filterInNth, agents, format_as_image): dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') filepath = os.path.join(dirpath, filename) if from_epoch is None: from_epoch = to_epoch - 60 * 60 * 24 * 7 with pd.HDFStore(filepath, mode='r') as h5: key = h5.keys()[0] # TODO: always select first? storer = h5.get_storer(key) row_count = storer.nrows Log.d(row_count) first_epoch = pd.read_hdf(h5, key, start=0, stop=1, columns=[]).index.values[0] last_epoch = pd.read_hdf(h5, key, start=row_count - 1, stop=row_count, columns=[]).index.values[0] column_names = [attr for attr in storer.attrs.data_columns] plot_html = h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents, format_as_image) if mode == 'plot_only': return plot_html feature_columns = set([ a.split('_')[1] for a in column_names if a.startswith('feature_') ]) feature_names = [c.split('(')[0] for c in feature_columns] agent_map = { fn: [c for c in feature_columns if c.startswith(fn)] for fn in feature_names } return render_template( 'frame.html', style=style, plothtml=plot_html, filename=filename, from_epoch=from_epoch, to_epoch=to_epoch, first_epoch=first_epoch, last_epoch=last_epoch, min_epoch=1514764800, max_epoch=int(time.time()), agent_map=sorted(agent_map.items()), # min epoch is 2018 job_uid=key, frame_info_html=json2html.convert(json={ 'row count': row_count, 'columns': column_names }))
def process_nonparsed_api_responses_full(self, sleep_seconds=0): Log.i( 'initiating continuous parsing of api responses with subset sleep interval: {} seconds', sleep_seconds) try: min_id = -1 next_min_id = 0 while next_min_id > min_id: min_id = next_min_id parse_count = 0 next_min_id = self.process_nonparsed_api_responses_subset( next_min_id=min_id) time.sleep(sleep_seconds) except Exception as e: raise Exception('Failed to process nonparsed api responses') from e transaction_count = self.store.transaction_count() Log.d('no more api responses to parse, transaction count is now {}', transaction_count)
def __init__(self, version): super().__init__(__file__) self.window_size = 15 self.interval_seconds = [15 * 60] # 15 minutes self.contruct_time = time.time() self.version = version self.sleep_seconds = 1 # must be low enough to produce empty result set eventually > reaktime self.transaction_min_timestamp = int( AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP')) self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH') Log.d('construct: {}', self.__dict__) self.db = DatabaseGateway() max_history_minutes = 10 * 24 * 60 #max(self.minute_intervals) self.from_currency_ids = [] self.to_currency_ids = [] self.run_config = self.read_run_config() self.jobs = list( self.__jobs_iterate(max_history_minutes, self.run_config)) Log.i('count of generator jobs: {}', len(self.jobs))
def handle_job_epoch(self, jobuid, df, start_index): trade_fee = float64(.25 / 100) min_capital = self.initial_capital * trade_fee * 10 print(start_index) print(len(df)) try: assert jobuid == '/bitcoinaverage_multiple_global_ETH_USD_900', 'unexpected job id' new_df = df[start_index:] for epoch, row in new_df.iterrows(): action = row[PREDICT_ACTION] coin_price = row['close'] if self.start_value is None: self.start_value = self.current_value(coin_price) if not isnan(action): print('coin price ', coin_price, ', capital ', self.capital) if action == FeatureValue.BUY: coin_transaction_count = (1 - trade_fee) * (self.capital - min_capital) / coin_price if coin_transaction_count > 0: print('BUYING coins: ', coin_transaction_count) cost = coin_transaction_count * coin_price fee = cost * trade_fee assert self.capital >= cost + fee, '{} >= {} + {} = {}'.format(self.capital, cost, fee, cost + fee) self.capital -= cost self.coins += coin_transaction_count self.pay_fee(cost) elif action == FeatureValue.SELL: fee = min(self.coins * coin_price * trade_fee, self.capital) coin_transaction_count = fee / (coin_price * trade_fee) if coin_transaction_count > 0 and self.coins >= coin_transaction_count: print('SELLING coins: {}'.format(coin_transaction_count)) gain = coin_transaction_count * coin_price self.capital += gain self.coins -= coin_transaction_count self.pay_fee(gain) else: Log.d('NOT ENOUGH COINS TO SELL! {} at {}', coin_transaction_count, fee) net_worth = self.current_value(coin_price) except Exception as e: raise Exception('Failed to execute on new job epoch') from e print(len(df)) print(df[PREDICT_ACTION].value_counts()) print('done') sys.stdout.flush()
def __run(self): Log.d('Watching file: {}', self.h5_filepath) thread = FileWatcher(self.h5_filepath, modified=self.handle_change).run_async() try: while self.handle_event.wait(): if self.last_handle_count is not None: jump_count = self.handle_count - self.last_handle_count if jump_count > 1: Log.w( 'handle count has jumped {} times than once since the last processing', jump_count) self.last_handle_count = self.handle_count self.process_h5() self.handle_event.clear() finally: Log.w('run loop broken, unwatching file: {}', self.h5_filepath) thread.stop() thread.join()
def process(self, epoch, df): if df.empty: Log.d('skipping processing of empty dataset') return r_index = df.index.get_loc(epoch) if self.predictor is not None: row_frame = df[r_index:r_index + 1] return self.__predict(row_frame) not_enough_predictor_data = r_index +1 < self.min_predict_generator_size if not_enough_predictor_data: return Log.d('initiating predictor contruction at index {}, frame length {}', r_index, len(df)) predictor = self.predictor_from_config_maybe() if predictor is not None: self.predictor = predictor Log.i('existing predictor endpoint loaded: {}', predictor.endpoint) return train_df = df[:r_index +1] Log.i('at index {}, detected data of adequate length {} writing csv', r_index, len(train_df), self.csv_filepath) self.write_csv(train_df) return None
def plot(title, second_count, frame, ax, is_image, label_connect, filter_in_nth, cp): values = FeatureBase.derive_plot_values(title, second_count, frame) if values is None: return None latest_min = values['latest'].min() mult_factor = values['latest'].max() - latest_min offset_y = (latest_min - mult_factor) values[cp + 'd_slow'] = values[cp + 'd_slow'] * mult_factor + offset_y values[cp + 'd_fast'] = values[cp + 'd_fast'] * mult_factor + offset_y values[cp + 'k'] = values[cp + 'k'] * mult_factor + offset_y filtered = values[values.index % filter_in_nth == 0] assert len(filtered) > 0 Log.d(frame[cp + action_cf].value_counts()) indices = filtered['index'].tolist() df_k = filtered[cp + 'k'] ax.plot(indices, df_k, color='orange', alpha=0.9, zorder=-5) df_d_slow = filtered[cp + 'd_slow'] ax.plot(indices, df_d_slow, color='lightblue', alpha=0.9) for y in [0, lower_k, upper_k, 1]: ax.plot([indices[0], indices[-1]], [y * mult_factor + offset_y] * 2, color='white', dashes=[6, 2], alpha=0.5, zorder=-10) df_buy = values[values[cp + action_cf] == ActionFeature.BUY] ax.scatter(df_buy['index'], df_buy[cp + 'd_slow'], color='green', s=70, zorder=10, alpha=0.7) df_sell = values[values[cp + action_cf] == ActionFeature.SELL] ax.scatter(df_sell['index'], df_sell[cp + 'd_slow'], color='red', s=70, zorder=10, alpha=0.7)
def __init__(self, datasource, exchange, from_currency, to_currency, interval_second, features, uid): assert isinstance(datasource, Datasource) assert isinstance(exchange, Exchange) assert isinstance(from_currency, Currency) assert isinstance(to_currency, Currency) assert isinstance(features, list) assert isinstance(interval_second, int) self.datasource = datasource self.exchange = exchange self.from_currency = from_currency self.to_currency = to_currency self.features = features # trend agent must be first! self.interval_second = interval_second self.uid = uid self.frame = None self.interval_stat = None self.reserved_cols = [ 'time', 'volume', 'age', 'is_simulated', 'is_realtime' ] self.feature_reserved_cols = ['time'] Log.d('generator job created with features: {}', sorted([f.col_prefix for f in self.features]))
def process_api_response_file(self, filepath, subscriber, datafetch_api_response=None): db = self.store filename = os.path.basename(filepath) if not os.path.isfile(filepath) or not filename.startswith( subscriber.handler_filename): return False receiveTimestamp = int(ParseUtil.extractTimestampText(filename)) with open(filepath, 'r') as disk_file: response_text = disk_file.read() response_text_md5hash = StringExpert.md5hash(response_text) if datafetch_api_response is None: datafetch_api_response = ParseUtil.partial_datafetch_api_response( subscriber, db) datafetch_api_response = { **datafetch_api_response, 'response': response_text, 'response_md5hash': response_text_md5hash, 'epoch_receive_time': receiveTimestamp, 'response_filename': filename } transaction = None try: datafetch_api_response_id = db.create_datafetch_api_response( datafetch_api_response) except DuplicateInsertException as e: Log.d('db rejected api_response_id as a duplicate: {}', response_text_md5hash) return False except Exception as e: Log.e('Failed to store api_response ({})', response_text_md5hash) raise e ParseUtil.parse_and_persist_as_transaction_maybe( datafetch_api_response, subscriber, db) return True
def datafetch_api_id_by_handler_filepath(self, handler_filepath, datafetch_api_ids=None, create_if_nonexisting=False): table_name = 'datafetch_api' col_name = 'handler_filepath' scalar_col_name = 'id' if create_if_nonexisting == True: result = self.__scalar_by_unique_col_value( table_name, col_name, handler_filepath, scalar_col_name, frame=datafetch_api_ids, nonexisting_is_error=False) if result is not None: return result handler_filename = os.path.basename(handler_filepath) result_endpoint_prefix = AppConfig.setting( 'RESULT_ENDPOINT_PREFIX') result_endpoint = '{}{}'.format(result_endpoint_prefix, handler_filename) new_datafetch_api_id = self.create_datafetch_api({ 'handler_filepath': handler_filepath, 'result_endpoint': result_endpoint, 'result_frequency_seconds': 30 }) Log.d('created datafetch api id {} for handler filepath "{}"', new_datafetch_api_id, handler_filepath) return self.__scalar_by_unique_col_value(table_name, col_name, handler_filepath, scalar_col_name, frame=datafetch_api_ids)
def feed_jobs_forever(self, job_changed_handler): assert job_changed_handler is not None sleep_seconds = self.sleep_seconds transaction_min_timestamp = self.transaction_min_timestamp start_transaction_min_timestamp = transaction_min_timestamp data_dirpath = self.data_dirpath start_time = time.time() Log.i( 'processing transactions, sleep interval {}s, starting from epoch {} ({})', sleep_seconds, transaction_min_timestamp, StringExpert.format_timestamp(transaction_min_timestamp)) to_fetch_count = self.db.transaction_count(transaction_min_timestamp) Log.d('transaction count since {} ({}): {}', transaction_min_timestamp, StringExpert.format_timestamp(transaction_min_timestamp), to_fetch_count) pd.set_option('io.hdf.default_format', 'table') hdf5_filename = '{}_{}_{}.h5'.format( self.version.major, self.version.minor, datetime.fromtimestamp(start_time).strftime('%Y%m%d_%H%M%S')) hdf5_filepath = path.join(data_dirpath, hdf5_filename) Log.i('hdf5 output filepath is: \n{}', hdf5_filepath) set_size = 1000 fetch_count = 0 plot_time = time.time() is_realtime = False while True: try: next_transaction_min_timestamp = self.process_transaction_subset( transaction_min_timestamp, set_size, hdf5_filepath, job_changed_handler, is_realtime) if next_transaction_min_timestamp is None: Log.d('nothing to process, waiting..') is_realtime = True # TODO: empty polling perhaps not the best indicator of switch to realtime time.sleep(sleep_seconds) else: assert next_transaction_min_timestamp > transaction_min_timestamp, 'next minimum timestamp was not greater than the current timestamp' transaction_min_timestamp = next_transaction_min_timestamp fetch_count += set_size percentage = 100 * fetch_count / to_fetch_count current_time = time.time() Log.d( 'processed {}/{}, {}%, spent {} on the period {} ({}) to {} ({})', fetch_count, to_fetch_count, int(percentage), Timespan.from_seconds(int(current_time - start_time)).as_string(), StringExpert.format_timestamp( start_transaction_min_timestamp), start_transaction_min_timestamp, StringExpert.format_timestamp( transaction_min_timestamp), transaction_min_timestamp) except Exception as e: raise Exception( 'Failed to process nonparsed api responses') from e Log.w('all {} rows read, but should loop forever', row_count)
def sagemaker_response_highest_score_label(self, prediction_response): Log.d('parsing response: {}', prediction_response) classifications = prediction_response['result']['classifications'] assert len(classifications) == 1 classification = classifications[0] classes = classification['classes'] Log.d('parsing classes: {}', classes) label_scores = { c['score']:c['label'] for c in classes if 'score' in c } assert len(label_scores) > 0 scores = sorted(label_scores.keys(), reverse=True) assert all(scores[i] >= scores[i+1] for i in range(len(scores)-1)) win_score = scores[0] win_label = int(label_scores[win_score]) Log.d('winner is \'{}\' in score: {}', win_label, label_scores) return win_label