Exemple #1
0
	def __init__(self):
		super().__init__(__file__)
		Log.d('construct')
		retry_delay_seconds = int(AppConfig.setting('DATAFETCH_API_RETRY_DELAY_SECONDS'))
		data_response_dirpath = AppConfig.setting('DATA_RESPONSE_DIRPATH')
		Log.d('data response dirpath is: {}', data_response_dirpath)
		self.retry_delay_seconds = retry_delay_seconds
		self.data_response_dirpath = data_response_dirpath
		OsExpert.ensure_abs_dirpath_exists(data_response_dirpath)
		self.subscribers = subscribe.all()
Exemple #2
0
 def __init__(self):
     super().__init__(__file__)
     Log.d('construct')
     self.dir_path = AppConfig.setting('DATA_RESPONSE_DIRPATH')
     self.store = Store()
     self.subscribers = subscribe.all()
     self.parse_util = ParseUtil(self.subscribers, self.store)
Exemple #3
0
	def __init__(self, h5_filepath, version):
		warnings.simplefilter('ignore', NaturalNameWarning)
		h5_inputfile = Path(h5_filepath)
		output_dirpath = AppConfig.setting('PREDICTOR_DATA_DIRPATH')
		self.h5_out_filepath = os.path.join(output_dirpath, h5_inputfile.name)
		h5_out_file =  Path(self.h5_out_filepath)
		if h5_out_file.exists():
			Log.i('overwrite file?: {}', h5_out_file)
			if not OsExpert.prompt_confirm('File already exists, overwrite? {}'.format(h5_out_file)):
				Log.d('user aborted, exiting')
				exit()
			Log.w('removing file: {}', h5_out_file)
			os.remove(self.h5_out_filepath)
		self.predictors_map = {}
		base_filepath = output_dirpath
		with pd.HDFStore(h5_filepath, mode='r') as h5: 	
			keys = h5.keys()
			Log.i('h5 input keys: {}', keys)
			assert len(keys) == 1, 'harcoded restriction on single key was violated'
			for key in keys:
				Log.i('row count for {}: {}', key, h5.get_storer(key).nrows)
				self.predictors_map[key] = [
				EnsemblePredictor(min_predict_generator_size=2000, max_train_size=5000)
				]		
		self.h5_watcher = H5FileWatcher(h5_filepath, self.handle_job_epoch, {'is_simulated': 0})
Exemple #4
0
def frame_info(filename):
    dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
    filepath = os.path.join(dirpath, filename)
    result = {}
    with pd.HDFStore(filepath, mode='r') as h5:
        key = h5.keys()[0]  # TODO: always select first?
        storer = h5.get_storer(key)
        time_column_names = [
            attr for attr in storer.attrs.data_columns
            if attr.endswith(')_time')
        ]
        frame = pd.read_hdf(h5, key)  #, columns=column_names)
        active_cols = [
            attr for attr in storer.attrs.data_columns
            if attr.endswith('_active')
        ]
        row_count = len(frame)
        result[key] = {
            'row count':
            row_count,
            'time cols sum':
            json2html.convert(frame[time_column_names].sum().to_json()),
            'active_ratios':
            json2html.convert(
                (frame[active_cols].sum() / row_count).to_json()),
            'latest_row':
            json2html.convert(frame.iloc[-1].to_json()),
            'isnull-sum':
            json2html.convert(frame.isnull().sum().to_json()),
            'describe':
            json2html.convert(frame.describe().to_json()),
        }
    return render_template('frame-info.html',
                           style=style,
                           frame_info_map=result)
Exemple #5
0
def retrieve(db, url, datasource_id, exchange_id, currency_id):
	temp_dirpath=AppConfig.setting('TEMP_DIRPATH')
	filepath = os.path.join(temp_dirpath, url.split('/')[-1])
	downloadFile(url, filepath)
	duplicateCount = 0
	insertCount = 0
	with gzip.open(filepath, 'rt') as f:
		Log.d('Processing csv file..')
		spamreader = csv.reader(f, delimiter=',', quotechar='|')
		for row in spamreader:
			timeStr = row[0]
			epochTime = int(timeStr)
			priceStr = row[1]
			price = float(priceStr)
			amountStr = row[2]
			amount = float(amountStr)
			transaction = {
				'datasource_id': datasource_id,
				'exchange_id': exchange_id,
				'amount': amount,
				'price': price,
				'currency_id': currency_id,
				'epoch_time': epochTime,
			}
			try:
				db.create_transaction(transaction)
				insertCount += 1
			except DuplicateInsertException as e:
				duplicateCount += 1
	os.remove(filepath)
	Log.i('Done processing, insert count: {}, duplicate count: {}', insertCount, duplicateCount)
Exemple #6
0
 def __init__(self):
     super().__init__(__file__, isToNotifyStartup=False)
     self.maxEmailReccurenceMinutes = float(
         AppConfig.setting('LOGWATCH_EMAIL_MAX_RECCURENCE_MINUTES'))
     self.triggerLines = ['ERROR', 'WARNING']
     Log.d('construct: {}', self.__dict__)
     self.matchCountSinceLastEmail = 0
     self.lastEmailDatetime = None
Exemple #7
0
 def get_or_create_hdf5(self):
     filepath = AppConfig.setting('DATASTORE_HDF5_FILEPATH')
     file = Path(filepath)
     exists = file.exists()
     if exists:
         assert file.is_file(), 'hdf5 filepath exists but is not a file'
     pd.set_option('io.hdf.default_format', 'table')
     hdf5 = pd.HDFStore(filepath, append=True)
     hdf5.swmr_mode = True  # may or may not have an effect
     return hdf5
Exemple #8
0
	async def __socket_subscribe(self):
		try:
			publicKey = AppConfig.setting('BITCOINAVERAGE_PUBLIC_KEY')
			secretKey = AppConfig.setting('BITCOINAVERAGE_SECRET_KEY')
			timestamp = int(time.time())
			payload = '{}.{}'.format(timestamp, publicKey)
			hex_hash = hmac.new(secretKey.encode(), msg=payload.encode(), digestmod=hashlib.sha256).hexdigest()
			signature = '{}.{}'.format(payload, hex_hash)
			ticket_url = 'https://apiv2.bitcoinaverage.com/websocket/get_ticket'
			ticket_header = {'X-signature': signature}
			async with aiohttp.ClientSession() as session:
				async with session.get(ticket_url, headers=ticket_header) as resp:
					response_text = await resp.text()
					Log.d('received ticket response: {}', response_text)
					if response_text == 'Client limit reached for api key apikey':
						raise Exception(response_text)
					response_json = json.loads(response_text)
					ticket = response_json['ticket']
			Log.d('ticket received: {}', ticket)
			url = 'wss://apiv2.bitcoinaverage.com/websocket/ticker?public_key={}&ticket={}'.format(publicKey, ticket)			
			subscribe_message = json.dumps({
					'event': 'message',
					'data': {
						'operation': 'subscribe',
						'options': {
							'currency': '{}{}'.format(self.from_currency_code, self.to_currency_code),
							'market': self.market_name()
						}
					}
				})
			Log.d('sending subscribe message: {}', subscribe_message)
			session = aiohttp.ClientSession()
			async with session.ws_connect(url) as ws:
				await ws.send_str(subscribe_message)
				async for msg in ws:
					if msg.type == aiohttp.WSMsgType.CLOSED:
						raise Exception('Socket presumed invalidated as received message was of aiohttp type "closed"')
					if msg.type == aiohttp.WSMsgType.ERROR:
						raise Exception('Socket presumed invalidated as received message was of aiohttp type "error"')
					result = msg.data
					yield result
		except Exception as e:
			raise Exception('Failed to subscribe via socket') from e
Exemple #9
0
 def __init__(self, version):
     super().__init__(__file__)
     self.window_size = 15
     self.interval_seconds = [15 * 60]  # 15 minutes
     self.contruct_time = time.time()
     self.version = version
     self.sleep_seconds = 1  # must be low enough to produce empty result set eventually > reaktime
     self.transaction_min_timestamp = int(
         AppConfig.setting('GENERATOR_TRANSACTION_MIN_TIMESTAMP'))
     self.data_dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
     Log.d('construct: {}', self.__dict__)
     self.db = DatabaseGateway()
     max_history_minutes = 10 * 24 * 60  #max(self.minute_intervals)
     self.from_currency_ids = []
     self.to_currency_ids = []
     self.run_config = self.read_run_config()
     self.jobs = list(
         self.__jobs_iterate(max_history_minutes, self.run_config))
     Log.i('count of generator jobs: {}', len(self.jobs))
Exemple #10
0
	def create_predictor_from_csv(self):
			Log.i('initiating sagemaker model creation')
			role = AppConfig.setting('AWS_PREDICTOR_ROLE')
			bucket='cryptrade-sagemaker'
			custom_code_upload_location = 's3://{}/customcode/tensorflow_iris'.format(bucket)
			model_artifacts_location = 's3://{}/artifacts'.format(bucket)
			Log.d('training data will be uploaded to: {}', custom_code_upload_location)
			Log.d('training artifacts will be uploaded to: {}', model_artifacts_location)
			sess = sagemaker.Session()
			def upload_to_s3(channel, filepath, skip_if_name_and_size_matches=False):
				file = Path(filepath)
				"""From SM examples. Like here: https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb"""
				s3 = boto3.resource('s3')
				key = channel + '/' + file.name
				bucket_ref = s3.Bucket(bucket)
				objs = list(bucket_ref.objects.filter(Prefix=key))
				is_file_already_existing = len(objs) > 0 and objs[0].key == key
				if is_file_already_existing is True:
					if skip_if_name_and_size_matches is True:
						s3_client = boto3.client('s3')
						response = s3_client.head_object(Bucket=bucket, Key=key)
						local_size = file.stat().st_size
						remote_size = response['ContentLength']
						if remote_size == local_size:
							Log.w('skipping upload as s3 key of same size ({:.2f}kb) already exists: {}', local_size/1000, key)
							return
					Log.w('overwriting existing s3 key: {}', key)
				with open(filepath, "rb") as data:
					s3.Bucket(bucket).put_object(Key=key, Body=data)
			s3_data_folder = 'data'
			upload_to_s3(s3_data_folder, self.train_filepath, True)
			upload_to_s3(s3_data_folder, self.test_filepath, True)
			upload_to_s3(s3_data_folder, self.meta_filepath)
			estimator = TensorFlow(
				entry_point='aws_dnn_predictor_entry.py',
				role=role,
				output_path=model_artifacts_location,
				code_location=custom_code_upload_location,
				train_instance_count=1,
				train_instance_type='ml.c5.xlarge',
				training_steps=1000,
				evaluation_steps=100
				)
			train_data_location = 's3://{}/{}'.format(bucket, s3_data_folder)
			Log.i('fitting train data: {}', train_data_location)
			estimator.fit(train_data_location)
			Log.i('deploying model')
			deploy_start = datetime.now()
			predictor = estimator.deploy(initial_instance_count=1,
			                                       instance_type='ml.t2.medium'
			                                       )
			deploy_end = datetime.now()
			Log.i('deployed predictor in {}s, endpoint is:\n{}', deploy_end - deploy_start, predictor.endpoint)
			
			self.predictor = predictor
Exemple #11
0
	def test_file_contents_md5hash(self):
		tempFilepath = os.path.join(
			AppConfig.setting('TEMP_DIRPATH'), 
			'file.txt'
			)
		with open(tempFilepath,'w') as f:
			f.write('sample text')
		self.assertEqual(
			OsExpert.md5(tempFilepath),
			'70ee1738b6b21e2c8a43f3a5ab0eee71'
			)
		os.remove(tempFilepath)
Exemple #12
0
def frame(mode, filename, from_epoch, to_epoch, filterInNth, agents,
          format_as_image):
    dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
    filepath = os.path.join(dirpath, filename)
    if from_epoch is None:
        from_epoch = to_epoch - 60 * 60 * 24 * 7
    with pd.HDFStore(filepath, mode='r') as h5:
        key = h5.keys()[0]  # TODO: always select first?
        storer = h5.get_storer(key)
        row_count = storer.nrows
        Log.d(row_count)
        first_epoch = pd.read_hdf(h5, key, start=0, stop=1,
                                  columns=[]).index.values[0]
        last_epoch = pd.read_hdf(h5,
                                 key,
                                 start=row_count - 1,
                                 stop=row_count,
                                 columns=[]).index.values[0]
        column_names = [attr for attr in storer.attrs.data_columns]
        plot_html = h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents,
                               format_as_image)
        if mode == 'plot_only':
            return plot_html
        feature_columns = set([
            a.split('_')[1] for a in column_names if a.startswith('feature_')
        ])
        feature_names = [c.split('(')[0] for c in feature_columns]
        agent_map = {
            fn: [c for c in feature_columns if c.startswith(fn)]
            for fn in feature_names
        }
        return render_template(
            'frame.html',
            style=style,
            plothtml=plot_html,
            filename=filename,
            from_epoch=from_epoch,
            to_epoch=to_epoch,
            first_epoch=first_epoch,
            last_epoch=last_epoch,
            min_epoch=1514764800,
            max_epoch=int(time.time()),
            agent_map=sorted(agent_map.items()),  # min epoch is 2018
            job_uid=key,
            frame_info_html=json2html.convert(json={
                'row count': row_count,
                'columns': column_names
            }))
Exemple #13
0
 def datafetch_api_id_by_handler_filepath(self,
                                          handler_filepath,
                                          datafetch_api_ids=None,
                                          create_if_nonexisting=False):
     table_name = 'datafetch_api'
     col_name = 'handler_filepath'
     scalar_col_name = 'id'
     if create_if_nonexisting == True:
         result = self.__scalar_by_unique_col_value(
             table_name,
             col_name,
             handler_filepath,
             scalar_col_name,
             frame=datafetch_api_ids,
             nonexisting_is_error=False)
         if result is not None:
             return result
         handler_filename = os.path.basename(handler_filepath)
         result_endpoint_prefix = AppConfig.setting(
             'RESULT_ENDPOINT_PREFIX')
         result_endpoint = '{}{}'.format(result_endpoint_prefix,
                                         handler_filename)
         new_datafetch_api_id = self.create_datafetch_api({
             'handler_filepath':
             handler_filepath,
             'result_endpoint':
             result_endpoint,
             'result_frequency_seconds':
             30
         })
         Log.d('created datafetch api id {} for handler filepath "{}"',
               new_datafetch_api_id, handler_filepath)
     return self.__scalar_by_unique_col_value(table_name,
                                              col_name,
                                              handler_filepath,
                                              scalar_col_name,
                                              frame=datafetch_api_ids)
Exemple #14
0
def h5_to_plot(h5, from_epoch, to_epoch, filterInNth, agents, format_as_image):
    Log.d('============')
    Log.d(agents)
    agent_keys = [a for a in agents.split(',') if a]
    if len(agent_keys) == 0:
        return 'No agent selected'
    filterInNth = int(filterInNth)
    df_info = ''
    pd.options.display.float_format = '{:.2f}'.format
    df_info += 'No agent selected\n\n{}\n\n'.format(h5.info())
    for key in h5:
        where = 'index >= {} and index <= {}'.format(from_epoch, to_epoch)
        Log.d('where: {}', where)
        frame = pd.read_hdf(h5, key, where=where)
        if frame.empty == True:
            return 'Empty frame'
        df_info += '{}\n\n'.format(frame.describe())
        background_color = '#272822'
        minute_intervals = [
            12 * 60,  # 12 hours
        ]
        x = range(100)
        y = [a * 2 + random.randint(-20, 20) for a in x]
        fig, ax = plt.subplots(figsize=(23, 12))  #figsize=(28,21))
        fig.patch.set_facecolor(background_color)
        Log.t('building plot')
        is_image_format = int(format_as_image) == True

        def label_connect(path_collection, labels, color=None):
            tooltip = mpld3.plugins.PointHTMLTooltip(path_collection, [
                '<span class="point-tooltip" style="color: {}">{} <span class="point-tooltip-key">{}<span><span>'
                .format(color, l, key) for l in labels
            ],
                                                     voffset=100,
                                                     hoffset=0)
            mpld3.plugins.connect(fig, tooltip)

        for agent_key in agent_keys:
            try:
                agent_name = agent_key.split('(')[0]
                Log.d('plotting agent: {} -> {}', agent_key, agent_name)
                agent = agent_map[agent_name]
                plot_title = ''
                col_prefix = 'feature_{}_'.format(agent_key)
                agent_plot = agent.plot(plot_title,
                                        None,
                                        frame,
                                        ax,
                                        is_image_format,
                                        label_connect=label_connect,
                                        filter_in_nth=filterInNth,
                                        cp=col_prefix)
                pe.style_plot(ax, plot_title)
            except KeyError as ke:
                Log.w('Valid keys are: {}', frame.keys())
                raise ke
        plot_dirpath = AppConfig.setting('PLOT_DIRPATH')
        plot_filepath = os.path.join(plot_dirpath,
                                     '{}.png'.format('some plot'))

        fig.patch.set_facecolor(style.backgroundColor)
        fig.tight_layout()
        if is_image_format == True:
            sio = BytesIO()
            fig.savefig(sio,
                        facecolor=fig.get_facecolor(),
                        edgecolor='none',
                        format="png")
            html = '<img src="data:image/png;base64,{}"/>'.format(
                base64.encodebytes(sio.getvalue()).decode())
            return html
        mpld3.plugins.connect(fig, ZoomSizePlugin())
        return mpld3.fig_to_html(fig)
    raise 'hmmm'
Exemple #15
0

@version(1, 0, 0)
class ExecuteApp(App):
    def __init__(self, version):
        super().__init__(__file__)

    def run(self, h5_filepath):
        self.executor = Executor(h5_filepath=h5_filepath,
                                 initial_capital=1000,
                                 initial_coins=10).run_async().join()


if __name__ == '__main__':
    try:
        app = ExecuteApp()
        assert len(
            sys.argv) == 2, 'not exactly two parameters (i.e. one argument)'
        h5_filename = sys.argv[1]
        h5_filepath = os.path.join(AppConfig.setting('PREDICTOR_DATA_DIRPATH'),
                                   h5_filename)
        assert Path(h5_filepath).is_file(), 'is not a file: {}'.format(
            h5_filepath)
        app.run(h5_filepath)
    except KeyboardInterrupt:
        print('\n\nKeyboardInterrupt\n')
    except Exception as e:
        Log.c('app failed: {}', e)
        stacktrace = OsExpert.stacktrace()
        Log.d('stacktrace:\n{}', stacktrace)
Exemple #16
0
 def initialize(appconfig_filepath, logconfig_filepath):
     OsExpert.ensure_abs_filepath_exists(appconfig_filepath)
     Log.initialize(logconfig_filepath)
     AppConfig.initialize(appconfig_filepath)
Exemple #17
0
	def predictor_from_config_maybe(self):
		endpoint = AppConfig.setting('AWS_DNN_PREDICTOR_ENDPOINT')
		return TensorFlowPredictor(endpoint) if endpoint not in (None, '') else None
Exemple #18
0
 def __init__(self):
     self.host = AppConfig.setting('DB_HOST')
     self.user = AppConfig.setting('DB_USER')
     self.password = AppConfig.setting('DB_PASSWORD')
     self.db_name = AppConfig.setting('DB_NAME')
Exemple #19
0
import sys; sys.path.append('..')
import os
import asyncio
from applogging import Log
from core import AppConfig, OsExpert, Timeout
import traceback
import time

AppConfig.initialize_in_file_dir(
	OsExpert.path_backstep(__file__)
	)     

class Parser():
	def __init__(self):
		wfPath = "/tmp/my_fifo2"
		wp = None
		try:
			if not os.path.exists(wfPath):	
				os.mkfifo(wfPath)
			while True:
				is_sent = False
				try:
					with Timeout(1):
						with open(wfPath, 'w') as wp:
							print('sending..')
							wp.write("a write!\n")		
						print('sent')
						is_sent = True
						time.sleep(1)
				except TimeoutError:
					if not is_sent:
Exemple #20
0
def list():
    dirpath = AppConfig.setting('GENERATOR_DATA_DIRPATH')
    files = [f for f in os.scandir(dirpath) if f.name.endswith('.h5')]
    return render_template('files.html', style=style, files=files)
Exemple #21
0
import json
import time
import configparser
import io
import os
import hashlib
import requests
import hmac
import websocket
import simplejson as json
from applogging import Log
from core import AppConfig, OsExpert
from db import DatabaseGateway

sys.path.append('..')
AppConfig.initialize_in_file_dir(OsExpert.path_backstep(__file__))
file_path = os.path.realpath(__file__)
print(file_path)
db = DatabaseGateway()
datafetch_api_id = db.datafetch_api_id_by_handler_filepath(file_path)
print(datafetch_api_id)
exit()


def prettyJson(jsonData):
    return json.dumps(jsonData, indent=4, sort_keys=True)


publicKey = AppConfig.setting("BITCOINAVERAGE_PUBLIC_KEY")
secretKey = AppConfig.setting("BITCOINAVERAGE_SECRET_KEY")
url = "https://apiv2.bitcoinaverage.com/websocket/get_ticket"
Exemple #22
0
class PredictApp(App):
    def __init__(self, version):
        super().__init__(__file__)

    def handle_change(self):
        Log.d('modified')

    def run(self, h5_filepath):
        predictor = Predictor(h5_filepath)
        thread = predictor.run_async()
        thread.join()


if __name__ == '__main__':
    try:
        app = PredictApp()
        assert len(
            sys.argv) == 2, 'not exactly two parameters (i.e. one argument)'
        h5_filename = sys.argv[1]
        h5_filepath = os.path.join(AppConfig.setting('GENERATOR_DATA_DIRPATH'),
                                   h5_filename)
        assert Path(h5_filepath).is_file(), 'is not a file: {}'.format(
            h5_filepath)
        app.run(h5_filepath)
    except KeyboardInterrupt:
        print('\n\nKeyboardInterrupt\n')
    except Exception as e:
        Log.c('app failed: {}', e)
        stacktrace = OsExpert.stacktrace()
        Log.d('stacktrace:\n{}', stacktrace)