def whoIsAlive(): #print(dtNow()) client = DAL.openConnection() cursor = DAL.liveness_getAll(client) results = list(cursor) for d in results: d['last_notif_min'] = floor((dtNow() - d["timestamp"]).total_seconds()/60) # how many seconds ago last received d['last_notif_sec'] = floor((dtNow() - d["timestamp"]).total_seconds()) # how many seconds ago last received if d['name'] == "producer: news": d['status'] = True if d['last_notif_min'] < 25 else False elif d['name'] == "producer: predictions": d['status'] = True if d['last_notif_min'] < 61 else False elif d['name'] == "producer: telegram": d['status'] = True if d['last_notif_min'] < 60 else False elif d['name'] == "worker: news": d['status'] = True if d['last_notif_min'] < 120 else False elif d['name'] == "worker: sentiments news": d['status'] = True if d['last_notif_min'] < 120 else False else: d['status'] = True if d['last_notif_min'] < 3 else False del d['timestamp'] del d['_id'] if mailer and not d['status']: send_email_server("Module ("+d['name']+") is offline", "last notif minutes: " + str(d['last_notif_min'])) results = sorted(results, key=lambda x: x['name']) return results
def fillUpFromHistory(): threads = [] dtit = datetime.datetime.strptime('2018-01-01 00:00', '%Y-%m-%d %H:%M') dtnow=dtNow() #dtnow=datetime.datetime.strptime('2018-02-09 13:00', '%Y-%m-%d %H:%M') while dtit <= dtnow: print("") dtnow=dtNow() print("dtit: " + str(dtit)) #print("dtnow: " + str(dtnow)) for x in range(len(mapping)): y = mapping[x] th = binance(dtit, client, y['base'], y['quote']) th.start() threads.append(th) if len(threads) == 1: for t in threads: try: t.join(timeout=30000) # 30 sec per article except Exception as ex: print(ex) threads=[] time.sleep(1) #dtit += datetime.timedelta(seconds=20) dtit += datetime.timedelta(minutes=1000)
def markSiteAsBuilt(site, mayVal): if mayVal[1] == 0: DAL.insert_newsSite_lastBuilt(client, site) print(str(dtNow()) + " " + " db:insert") else: DAL.update_newsSite_lastBuilt(client, mayVal[1]) print(str(dtNow()) + " " + " db:update")
def processSites(sitesarr): print(str(dtNow()) + " " + " processSites start") pool = ThreadPool( len(sitesarr)) # pool of X threads to process sites in parallel pool.map(processSite, sitesarr) pool.close() pool.join() print(str(dtNow()) + " " + " processSites end")
def processAll(): print(str(dtNow()) + " processAll start") divideWork(processSites, SITES_forumProducer, N_SITES_PARALLEL) # how many news sites to process in parallel print(str(dtNow()) + " processAll end") print()
def processArticles(articles): print(str(dtNow()) + " " + " processArticles start") pool = ThreadPool( len(articles)) # pool of X threads to process articles in parallel pool.map(processArticle, articles) pool.close() pool.join() print(str(dtNow()) + " " + " processArticles end")
def process(): X_sleep = len(CRYPTO_facebookPages) * 1.40 X_sleep = 60 if X_sleep < 60 else X_sleep last_check = dtNow() - timedelta( seconds=X_sleep ) # give it some slack in case something was posted just recently CryptoMapping = list(CRYPTO_socialKeywords.items()) while True: try: log.info("\tlast check: " + str(last_check)) ret = graph.batch(post_arr) for entries in ret: try: log.info(entries) for obj in entries['data']: body = '' link = obj['link'] if 'link' in obj else '' created_time = datetime.strptime( obj['created_time'], '%Y-%m-%dT%H:%M:%S%z').replace( tzinfo=None) # UTC : OK #log.info(created_time) if not created_time >= last_check: continue # skip non-realtime mentions if 'message' in obj: body += obj['message'] if 'name' in obj: body += "\n" + obj['name'] log.info("created: " + obj['created_time']) log.info("relative: " + str(created_time)) log.info("") sbody = nltk.wordpunct_tokenize(body.lower()) for crypto, kws in CryptoMapping: for kw in kws: if kw in sbody: log.info("sending to kafka: " + link) producerMgr.producer_send_mentionsSocial( body, 'facebook', link, crypto, producer) #pprint.plog.info(o) #log.info("") except Exception as ex: logErr.critical(str(ex), exc_info=True) client = DAL.openConnection() DAL.liveness_IAmAlive(client, "producer: facebook") except Exception as ex: logErr.critical(str(ex), exc_info=True) log.info("\tlast check: " + str(last_check)) log.info("X_sleep: " + str(X_sleep)) last_check = dtNow() # new last_check time.sleep(X_sleep)
def portfolioToImage(portfolio): # https://stackoverflow.com/questions/8409095/matplotlib-set-markers-for-individual-points-on-a-line plt.clf() # clear all fig, ax2 = plt.subplots() data = [] for ts, obj in portfolio.items(): if 'ap' in obj: data.append(obj['ap']) ax2.set_ylabel('price') ax2.plot(data, color='black', marker='o', alpha=0.8) # wild guesses ############# #max_tick = len(xpolated[0][concat_graph:]) #ax1.xaxis.set_ticks(np.arange(len(dataset[concat_graph:, 1]), max_tick, 1)) #ax2.xaxis.set_ticks(np.arange(len(dataset[concat_graph:, 1]), max_tick, 1)) #ax2.xaxis.set_ticks(np.arange(0, 53, 1)) ax2.xaxis.grid(True) # horiz. fig.set_size_inches((len(data) * 0.3), 8, forward=True) _path = '../ui/temp/images/' + str(dtNow()) + '_(0)__' + 'backtesting.png' plt.savefig(_path, bbox_inches='tight', dpi=100, format='png') # auto-resize bbox_inches='tight'
def insert_newsSite_lastBuilt(client, url): try: db = selectDB(client) db.newsbuilds.insert_one({'url':url,'ts':dtNow()}) except Exception as ex: logErr = createLogger("DAL", "DAL_error") logErr.critical(str(ex), exc_info=True)
def update_newsSite_lastBuilt(client, id): try: db = selectDB(client) db.newsbuilds.update({'_id':ObjectId(id)}, {'$set':{'ts':dtNow()}}) except Exception as ex: logErr = createLogger("DAL", "DAL_error") logErr.critical(str(ex), exc_info=True)
def processSite(site): DUP_TITLES = [] # clean slate every site print(str(dtNow()) + " " + " processSite start") print(str(dtNow()) + " " + site) mayVal = mayProcessArticles(site) b = newspaper.build( site, memoize_articles=True) # False to disable cache ; True in production markSiteAsBuilt(site, mayVal) if mayVal[0]: divideWork( processArticles, b.articles, N_ARTICLES_PARALLEL) # how many articles to process in parallel else: print(str(dtNow()) + " " + " skipping processArticles") print(str(dtNow()) + " " + " processSite end")
def liveness_IAmAlive(client, name): try: db = selectDB(client) db.liveness.update({'name': name}, {'name': name, 'timestamp': dtNow()}, upsert=True) except Exception as ex: logErr = createLogger("DAL", "DAL_error") logErr.critical(str(ex), exc_info=True)
def persistMatch(body, title, url, crypto): source = getDomainFromString(url) print(str(dtNow()) + " " + " found at source: " + source) print(url) print(title) print(body) print() print()
def mayProcessArticles(site): # make sure the site was built less than an hour ago # if not, we should build it first, so second iteration will be considered as real-time data # if we don't do this then we may produce data that is old and was not published in current window-time lastb = DAL.check_when_newsSite_lastBuilt(client, site) if len(lastb) == 0: return [False, 0] # new site, has not been built yet now = dtNow() lastcheck = lastb[0]['ts'] ds = (now - lastcheck).total_seconds() print(str(dtNow()) + " " + " last build: " + str(ds) + " sec ago.") if ds > 60 * 60: return [ False, lastb[0]['_id'] ] # more than an hour passed since last check, build now and produce in next iteration else: return [True, lastb[0]['_id']]
def producer_send_mentionsSocial(body, source, url, crypto, producer): ejs = json.dumps({ 'body': body, 'source': source, 'url': url, 'crypto': crypto, 'type': 'social', }) producer.send(kafkaTopic_mentionsSocial, ejs.encode()) print(str(dtNow().strftime("%Y-%m-%d %H:%M:%S")) + " SENT:\t" + ejs) print()
def producer_send_mentionsNews(body, title, source, url, crypto, producer): ejs = json.dumps({ 'body': body, 'title': title, 'source': source, 'url': url, 'crypto': crypto, 'type': 'news', }) producer.send(kafkaTopic_mentionsNews, ejs.encode()) print(str(dtNow().strftime("%Y-%m-%d %H:%M:%S")) + " SENT:\t" + source) print()
def streamAll(): producer = producerMgr.create_kafkaProducer() subreddits = list(CRYPTO_redditProducer_subreddits.values()) # get values subreddits = [item for items in subreddits for item in items] # flatten querystring = "+".join(subreddits) log.info(querystring) CryptoMapping = list(CRYPTO_socialKeywords.items()) while True: try: client = DAL.openConnection() alive_counter = dtNow() reddit = praw.Reddit(client_id='EzcegP77YYq7dg', client_secret="CwTogkSNVPGIJFiQdWyZF_Gqqr4", user_agent='USERAGENT') for comment in reddit.subreddit(querystring).stream.comments(): if comment.body.find('Your submission has been flagged') == -1: body = comment.link_title + " | " + comment.body # let's construct a new 'body' since comments don't always tell which crypto is discussed sbody = nltk.wordpunct_tokenize(body.lower()) for crypto, kws in CryptoMapping: for kw in kws: if kw in sbody: log.info("sending to kafka: " + comment.link_url) producerMgr.producer_send_mentionsSocial( comment.body, 'reddit', comment.link_url, crypto, producer) if (dtNow() - alive_counter).total_seconds() >= 15: DAL.liveness_IAmAlive( client, "producer: reddit") alive_counter = dtNow() break # one signal per crypto only except Exception as ex: logErr.critical(str(ex), exc_info=True) time.sleep(20)
def on_data(self, data): try: data = json.loads(data) if 'user' in data: body, url = '', '' if 'user' in data and not 'retweeted_status' in data: body = data['text'] url = 'https://twitter.com/' + data['user'][ 'screen_name'] + '/status/' + data['id_str'] else: # this is a re-tweet, so let us extract the original tweet url = 'https://twitter.com/' + data['retweeted_status'][ 'user']['screen_name'] + '/status/' + data[ 'retweeted_status']['id_str'] body = ( data['retweeted_status']['extended_tweet']['full_text'] if data['retweeted_status']['truncated'] else data['retweeted_status']['text']) sbody = nltk.wordpunct_tokenize(body.lower()) for crypto, kws in self.CryptoMapping: for kw in kws: if kw in sbody: log.info("sending to kafka: " + url) producerMgr.producer_send_mentionsSocial( body, 'twitter', url, crypto, producer) if (dtNow() - self.alive_counter).total_seconds() >= 15: DAL.liveness_IAmAlive(self.client, "producer: twitter") self.alive_counter = dtNow() break # one signal per crypto only else: log.info(data) except Exception as ex: logErr.critical(str(ex), exc_info=True) return True
def processArticle(article): try: article.download() article.parse() if article.title in DUP_TITLES: pass # preventing duplicate entries who have same headline but different url (due to url params etc...) else: DUP_TITLES.append(article.title) for crypto, terms in CRYPTO_socialKeywords.items(): if contains(article.title, terms) or contains( article.text, terms): persistMatch(article.text, article.title, article.url, crypto) except: print(str(dtNow()) + " " + "error processingArticle")
def main(args = sys.argv): # by default, this retrieves all data, it does not aggregate, # if you wish to aggregate above 1min, then do it manually. client = DAL.openConnection() db=client.crypto if not len(args) >= 2: print("expected exchange parameter, e.g.: binance") sys.exit(0) exchange = args[1] if not len(args) >= 3: print("expected base currency parameter, e.g.: BTC") sys.exit(0) base_cur = args[2] if not len(args) >= 4: print("expected quote currency parameter, e.g.: USDT") sys.exit(0) quote_cur = args[3] if not len(args) >= 5: print("expected interval parameter, e.g.: 1 (=1 minute)") sys.exit(0) INTERVAL = int(args[4]) if not len(args) >= 6: print("expected historymins parameter, e.g.: 60 (=60 minutes)") sys.exit(0) historymins = int(args[5]) if not len(args) >= 7: print("expected currentDateTime parameter") sys.exit(0) currentDateTime = datetime.datetime.strptime(args[6], '%Y-%m-%dT%H:%M') if currentDateTime > dtNow(): currentDateTime = dtNow().replace(second=0,microsecond=0) # create correct min and max according to total window size and intervals: [min, max[ maxDateTimeExcluded = currentDateTime if INTERVAL > 1: # make sure we only retrieve complete intervals (not still evolving data) -- to prevent caching issues maxDateTimeExcluded = currentDateTime.replace(minute=currentDateTime.minute-(currentDateTime.minute % INTERVAL)) minDateTimeIncluded = maxDateTimeExcluded - datetime.timedelta(minutes=historymins) if INTERVAL < 60: def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T' +\ str(e['_id']['hour']).zfill(2) + ':' +\ str(e['_id']['interval']).zfill(2) queryinterval = { 'year': {'$year' : '$timestamp'}, 'month': {'$month' : '$timestamp'}, 'day': {'$dayOfMonth' : '$timestamp'}, 'hour': {'$hour' : '$timestamp'}, 'interval' : { # create 15-minute intervals: [0-15[ ; [15-30[ ; [30-45[ ; [45-60[ '$subtract' : [ {'$minute' : '$timestamp'}, {'$mod':[{'$minute' : '$timestamp'}, INTERVAL]} ] } } elif INTERVAL >= 60 and INTERVAL < 1440: # hour interval def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T' +\ str(e['_id']['interval']).zfill(2) + ':00' queryinterval = { 'year': {'$year' : '$timestamp'}, 'month': {'$month' : '$timestamp'}, 'day': {'$dayOfMonth' : '$timestamp'}, 'interval' : { '$subtract' : [ {'$hour' : '$timestamp'}, {'$mod':[{'$hour' : '$timestamp'}, int(INTERVAL/60)]} ] } } else: def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T00:00' queryinterval = { 'year': {'$year' : '$timestamp'}, 'month': {'$month' : '$timestamp'}, 'day': {'$dayOfMonth' : '$timestamp'}, } pipeline = [ {'$match' : { 'base_cur' : base_cur, 'quote_cur' : quote_cur, 'exchange': exchange, 'timestamp': { '$gte': minDateTimeIncluded, '$lt': maxDateTimeExcluded, } } }, {'$group' : { '_id' : { }, 'low' : {'$min':'$data.low'}, 'high' : {'$max':'$data.high'}, 'open': {'$first':'$$ROOT.data.open'}, 'close': {'$last':'$$ROOT.data.close'}, 'volume':{'$sum':'$data.volume'}, 'trades':{'$sum':'$data.trades'}, 'count': {'$sum':1}, } } ] pipeline[1]['$group']['_id'] = queryinterval cursor = db.get_collection('exchanges').aggregate(pipeline); FINAL = [] for e in cursor: if e['count'] < INTERVAL: # make sure candlestick (if aggregated) is complete (if not, probably historymins too short, or missing data) continue adjust_func(e) e['label_dt'] = datetime.datetime.strptime(e['label'], '%Y-%m-%dT%H:%M') e['label_to'] = datetime.datetime.strftime(e['label_dt'] + datetime.timedelta(minutes=INTERVAL), '%Y-%m-%dT%H:%M') del e['_id'] FINAL.append(e) FINAL = sorted(FINAL, key=(lambda x:( x['label_dt'] ) )) for e in FINAL: e.pop('label_dt', None) return FINAL
'quote_cur': 'USDT' }, ] # testing seq_pred_len = 1 predict_n_intervals_arr = [12] n_windows = [32] n_neurons = [2] n_hiddenlayers = [1] n_epochs = [1000] intervals = [ int(args[1]), ] n_batch_sizes = [ 512, ] datasets = makeDatasets() _dtnow = dtNow() pmanager = multiprocessing.Manager() sync_dict_json = pmanager.dict() sync_list_output = pmanager.list() arrParams = {} threads = [] HH_max = 1 dtstart = dtNow() #dtstart = datetime.strptime('2018-04-07 15:00', '%Y-%m-%d %H:%M') #HH_max = 20 # --> dtstart + ( i in HH_max) * interval train_predict()
def main(args=sys.argv): client = DAL.openConnection() db = client.crypto if not len(args) >= 2: print("expected crypto parameter, e.g. BTC [interval mins]") sys.exit(0) INTERVAL_GRAPH_mentionsBasic = 60 # 60 minutes default if len(args) >= 3: INTERVAL_GRAPH_mentionsBasic = int(args[2]) currentDateTime = dtNow().replace(second=0, microsecond=0) if len(args) >= 5: currentDateTime = datetime.datetime.strptime( args[4], '%Y-%m-%dT%H:%M' ) # in future the user may send datetime from another tz, use dtLocal() if currentDateTime > dtNow(): currentDateTime = dtNow().replace(second=0, microsecond=0) # create correct min and max according to total window size and intervals: [min, max[ maxDateTimeExcluded = currentDateTime if INTERVAL_GRAPH_mentionsBasic > 1: maxDateTimeExcluded = currentDateTime.replace( minute=currentDateTime.minute - (currentDateTime.minute % INTERVAL_GRAPH_mentionsBasic)) WINDOW = 1440 if len(args) >= 4: # value in minutes WINDOW = int(args[3]) minDateTimeIncluded = maxDateTimeExcluded - datetime.timedelta( minutes=WINDOW) if INTERVAL_GRAPH_mentionsBasic < 60: def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T' +\ str(e['_id']['hour']).zfill(2) + ':' +\ str(e['_id']['interval']).zfill(2) interval = { 'year': { '$year': '$timestamp' }, 'month': { '$month': '$timestamp' }, 'day': { '$dayOfMonth': '$timestamp' }, 'hour': { '$hour': '$timestamp' }, 'interval': { # create 15-minute intervals: [0-15[ ; [15-30[ ; [30-45[ ; [45-60[ '$subtract': [{ '$minute': '$timestamp' }, { '$mod': [{ '$minute': '$timestamp' }, INTERVAL_GRAPH_mentionsBasic] }] } } elif INTERVAL_GRAPH_mentionsBasic >= 60 and INTERVAL_GRAPH_mentionsBasic < 1440: # hour interval def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T' +\ str(e['_id']['interval']).zfill(2) + ':00' interval = { 'year': { '$year': '$timestamp' }, 'month': { '$month': '$timestamp' }, 'day': { '$dayOfMonth': '$timestamp' }, 'interval': { '$subtract': [{ '$hour': '$timestamp' }, { '$mod': [{ '$hour': '$timestamp' }, int(INTERVAL_GRAPH_mentionsBasic / 60)] }] } } else: def adjust_func(e): e['label'] = str(e['_id']['year']).zfill(4) + '-' +\ str(e['_id']['month']).zfill(2) + '-' +\ str(e['_id']['day']).zfill(2) + 'T00:00' interval = { 'year': { '$year': '$timestamp' }, 'month': { '$month': '$timestamp' }, 'day': { '$dayOfMonth': '$timestamp' }, } pipeline = [{ '$match': { 'fromSymbol': { '$eq': args[1] }, 'timestamp': { '$gte': minDateTimeIncluded, '$lt': maxDateTimeExcluded, } } }, { '$group': { '_id': {}, 'avg': { '$avg': '$fromVol24_sum' }, } }] pipeline[1]['$group']['_id'] = interval #print(pipeline) cursor = db.get_collection('volumes').aggregate(pipeline) result = list(cursor) FINAL = [] # pre-process: for e in result: adjust_func(e) e['label_dt'] = datetime.datetime.strptime(e['label'], '%Y-%m-%dT%H:%M') if e['label_dt'] < minDateTimeIncluded or ( e['label_dt'] + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic) ) > maxDateTimeExcluded: continue e['start'] = str(e['label_dt']) e['end'] = str(e['label_dt'] + datetime.timedelta( minutes=INTERVAL_GRAPH_mentionsBasic)) e['label'] = str( datetime.datetime.strftime(e['label_dt'], '%Y-%m-%dT%H:%M') ) # + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic) e.pop('_id', None) FINAL.append(e) # add missing intervals tmp_datetime = min([ x['label_dt'] for x in FINAL ]) # we need smallest interval, not just minDateTimeIncluded while (tmp_datetime + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic) < maxDateTimeExcluded): contains = False for e in FINAL: if e['label_dt'] == tmp_datetime: contains = True break if not contains: e_tmp = copy.copy(FINAL[0]) e_tmp['label_dt'] = tmp_datetime e_tmp['start'] = str(e_tmp['label_dt']) e_tmp['end'] = str(e_tmp['label_dt'] + datetime.timedelta( minutes=INTERVAL_GRAPH_mentionsBasic)) e_tmp['label'] = str( datetime.datetime.strftime(tmp_datetime, '%Y-%m-%dT%H:%M') ) # + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic) e_tmp['avg'] = None e_tmp['avg_delta'] = None FINAL.append(e_tmp) tmp_datetime = tmp_datetime + datetime.timedelta( minutes=INTERVAL_GRAPH_mentionsBasic) # sort list : sorted_list = sorted(FINAL, key=(lambda x: (x['label_dt']))) FINAL = [] # post-process: prev_avg = None for e in sorted_list: if e['avg'] != None: e['avg'] = round(e['avg'], 2) if prev_avg == None: e['avg_delta'] = 0 else: e['avg_delta'] = round(e['avg'] - prev_avg, 2) prev_avg = e['avg'] del e['label_dt'] FINAL.append(e) return FINAL
def train_predict(args=sys.argv): # we need to generate every possible combination of our configuration, let's pre-process it. # we basically create and store tuples in an array. # the array will be processed in a multi-processing fashion. # we don't want to parallellize every possible combination, # but instead we want to have max 6 to 9 processes running at the same time. # that's why at the deepest level we have a "uid" which acts as separator. # this is an important part, because if you have many different combinations you want to try out (e.g. different epochs and neuron counts), # then you want to make sure the processes don't take too long or make the server crash due to too many processes (or memory consumption). for HH in range(HH_max): for exchange in sorted(exchanges): for symbol in sorted(symbols, key=lambda x: x['base_cur']): for featuresID, dataset_func in datasets.items(): for n_window in n_windows: for interval in intervals: for n_epoch in n_epochs: for n_neuron in n_neurons: for n_hiddenlay in n_hiddenlayers: for n_batch_size in n_batch_sizes: for predict_n_intervals in predict_n_intervals_arr: h5fn = h5Dir + 'predictions_v1' + ' base_cur=' + symbol[ 'base_cur'] + ' base_cur=' + symbol[ 'quote_cur'] + ' fid=' + featuresID + ' interval=' + str( interval ) + ' n_window=' + str( n_window ) + ' n_epoch=' + str( n_epoch ) + ' n_batch_size=' + str( n_batch_size ) + ' n_neuron=' + str( n_neuron ) + ' predict_n_intervals=' + str( predict_n_intervals ) + ' n_hiddenlay=' + str( n_hiddenlay) _dtime = adjustDatetime_realtime( interval, dtstart + timedelta( minutes=HH * interval)) uid = symbol[ 'base_cur'] #+"_"+symbol['quote_cur']+"_"+str(n_neuron)+"_"+str(n_window) # way to parallellize processing if not uid in arrParams: arrParams[uid] = [] arrParams[uid].append( (h5fn, featuresID, exchange, symbol, n_window, interval, _dtime, predict_n_intervals, n_neuron, n_hiddenlay, n_epoch, n_batch_size, dataset_func, sync_dict_json, sync_list_output, seq_pred_len)) # now that we have our magical array of jobs/tasks, # let's create a processing pool and execute all jobs accordingly. tasks = {} pools = {} for idf, arr in arrParams.items(): tasks[idf] = [] if not idf in pools: pools[idf] = multiprocessing.Pool(1) for tup in arr: tasks[idf].append(pools[idf].apply_async(fitAndPredict_trainAlways, tup)) client = DAL.openConnection() DAL.liveness_IAmAlive(client, "producer: predictions") for idf, arr in tasks.items(): for task in arr: try: task.get(timeout=60 * 20) except KeyboardInterrupt: raise except: traceback.print_exc() pools[idf].close() for sendobj in sync_list_output: DAL.store_predictions_v1(client, sendobj) print("/performance/") print("started:") print(_dtnow) print("ended:") print(dtNow()) print("/exited/") print("") log = createLogger("predictions_v1_info", "predictions_v1_info") log.info("/performance/") log.info("started:") log.info(str(_dtnow)) log.info("ended:") log.info(str(dtNow())) log.info("/exited/") log.info("")
client = DAL.openConnection() db = client.crypto if not len(sys.argv) >= 2: print("expected exchange and symbol parameters, e.g. binance BTCUSDT ") sys.exit(0) exchange = sys.argv[1] symbol = sys.argv[2] INTERVAL = 30 if len(sys.argv) >= 4: INTERVAL = int(sys.argv[3]) currentDateTime = dtNow().replace(second=0, microsecond=0) if len(sys.argv) >= 6: currentDateTime = datetime.datetime.strptime( sys.argv[5], '%Y-%m-%dT%H:%M' ) # in future the user may send datetime from another tz, use dtLocal() if currentDateTime > dtNow(): currentDateTime = dtNow().replace(second=0, microsecond=0) maxDateTimeExcluded = currentDateTime if INTERVAL > 1: # and INTERVAL <= 60 maxDateTimeExcluded = currentDateTime.replace( minute=currentDateTime.minute - (currentDateTime.minute % INTERVAL)) WINDOW = 1440 if len(sys.argv) >= 5: # value in minutes WINDOW = int(sys.argv[4])
def __init__(self): self.CryptoMapping = list(CRYPTO_socialKeywords.items()) self.client = DAL.openConnection() self.alive_counter = dtNow()