예제 #1
0
def whoIsAlive():
	#print(dtNow())
	client = DAL.openConnection()
	cursor = DAL.liveness_getAll(client)
	results = list(cursor)
	for d in results:
		d['last_notif_min'] = floor((dtNow() - d["timestamp"]).total_seconds()/60) # how many seconds ago last received
		d['last_notif_sec'] = floor((dtNow() - d["timestamp"]).total_seconds()) # how many seconds ago last received
		if d['name'] == "producer: news":
			d['status'] = True if d['last_notif_min'] < 25 else False
		elif d['name'] == "producer: predictions":
			d['status'] = True if d['last_notif_min'] < 61 else False
		elif d['name'] == "producer: telegram":
			d['status'] = True if d['last_notif_min'] < 60 else False

		elif d['name'] == "worker: news":
			d['status'] = True if d['last_notif_min'] < 120 else False
		elif d['name'] == "worker: sentiments news":
			d['status'] = True if d['last_notif_min'] < 120 else False

		else:
			d['status'] = True if d['last_notif_min'] < 3 else False
		del d['timestamp']
		del d['_id']

		if mailer and not d['status']:
			send_email_server("Module ("+d['name']+") is offline", "last notif minutes: " + str(d['last_notif_min']))

	results = sorted(results, key=lambda x: x['name'])
	return results
예제 #2
0
def fillUpFromHistory():

    threads = []
    dtit = datetime.datetime.strptime('2018-01-01 00:00', '%Y-%m-%d %H:%M')
    dtnow=dtNow()
    #dtnow=datetime.datetime.strptime('2018-02-09 13:00', '%Y-%m-%d %H:%M')

    while dtit <= dtnow:
        print("")
        dtnow=dtNow()
        print("dtit: " + str(dtit))
        #print("dtnow: " + str(dtnow))
        
        for x in range(len(mapping)):
            y = mapping[x]

            th = binance(dtit, client,  y['base'], y['quote'])
            th.start()
            threads.append(th)
            if len(threads) == 1:
                for t in threads:
                    try:
                        t.join(timeout=30000) # 30 sec per article
                    except Exception as ex:
                        print(ex)
                threads=[]
                time.sleep(1)

        #dtit += datetime.timedelta(seconds=20)
        dtit += datetime.timedelta(minutes=1000)
예제 #3
0
def markSiteAsBuilt(site, mayVal):
    if mayVal[1] == 0:
        DAL.insert_newsSite_lastBuilt(client, site)
        print(str(dtNow()) + "		" + " db:insert")
    else:
        DAL.update_newsSite_lastBuilt(client, mayVal[1])
        print(str(dtNow()) + "		" + " db:update")
예제 #4
0
def processSites(sitesarr):
    print(str(dtNow()) + "	" + " processSites start")
    pool = ThreadPool(
        len(sitesarr))  # pool of X threads to process sites in parallel
    pool.map(processSite, sitesarr)
    pool.close()
    pool.join()
    print(str(dtNow()) + "	" + " processSites end")
예제 #5
0
def processAll():
    print(str(dtNow()) + " processAll start")

    divideWork(processSites, SITES_forumProducer,
               N_SITES_PARALLEL)  # how many news sites to process in parallel

    print(str(dtNow()) + " processAll end")
    print()
예제 #6
0
def processArticles(articles):
    print(str(dtNow()) + "			" + " processArticles start")
    pool = ThreadPool(
        len(articles))  # pool of X threads to process articles in parallel
    pool.map(processArticle, articles)
    pool.close()
    pool.join()
    print(str(dtNow()) + "			" + " processArticles end")
예제 #7
0
def process():
    X_sleep = len(CRYPTO_facebookPages) * 1.40
    X_sleep = 60 if X_sleep < 60 else X_sleep
    last_check = dtNow() - timedelta(
        seconds=X_sleep
    )  # give it some slack in case something was posted just recently
    CryptoMapping = list(CRYPTO_socialKeywords.items())
    while True:
        try:
            log.info("\tlast check: " + str(last_check))
            ret = graph.batch(post_arr)
            for entries in ret:
                try:
                    log.info(entries)
                    for obj in entries['data']:
                        body = ''
                        link = obj['link'] if 'link' in obj else ''
                        created_time = datetime.strptime(
                            obj['created_time'],
                            '%Y-%m-%dT%H:%M:%S%z').replace(
                                tzinfo=None)  # UTC : OK
                        #log.info(created_time)
                        if not created_time >= last_check:
                            continue  # skip non-realtime mentions

                        if 'message' in obj:
                            body += obj['message']
                        if 'name' in obj:
                            body += "\n" + obj['name']

                        log.info("created: " + obj['created_time'])
                        log.info("relative: " + str(created_time))
                        log.info("")
                        sbody = nltk.wordpunct_tokenize(body.lower())
                        for crypto, kws in CryptoMapping:
                            for kw in kws:
                                if kw in sbody:
                                    log.info("sending to kafka: " + link)
                                    producerMgr.producer_send_mentionsSocial(
                                        body, 'facebook', link, crypto,
                                        producer)
                        #pprint.plog.info(o)
                        #log.info("")
                except Exception as ex:
                    logErr.critical(str(ex), exc_info=True)
            client = DAL.openConnection()
            DAL.liveness_IAmAlive(client, "producer: facebook")
        except Exception as ex:
            logErr.critical(str(ex), exc_info=True)

        log.info("\tlast check: " + str(last_check))
        log.info("X_sleep: " + str(X_sleep))
        last_check = dtNow()  # new last_check
        time.sleep(X_sleep)
예제 #8
0
def portfolioToImage(portfolio):
    # https://stackoverflow.com/questions/8409095/matplotlib-set-markers-for-individual-points-on-a-line

    plt.clf()  # clear all
    fig, ax2 = plt.subplots()

    data = []
    for ts, obj in portfolio.items():
        if 'ap' in obj:
            data.append(obj['ap'])

    ax2.set_ylabel('price')
    ax2.plot(data, color='black', marker='o', alpha=0.8)  # wild guesses

    #############
    #max_tick = len(xpolated[0][concat_graph:])
    #ax1.xaxis.set_ticks(np.arange(len(dataset[concat_graph:, 1]), max_tick, 1))
    #ax2.xaxis.set_ticks(np.arange(len(dataset[concat_graph:, 1]), max_tick, 1))

    #ax2.xaxis.set_ticks(np.arange(0, 53, 1))
    ax2.xaxis.grid(True)  # horiz.

    fig.set_size_inches((len(data) * 0.3), 8, forward=True)
    _path = '../ui/temp/images/' + str(dtNow()) + '_(0)__' + 'backtesting.png'

    plt.savefig(_path, bbox_inches='tight', dpi=100,
                format='png')  # auto-resize bbox_inches='tight'
예제 #9
0
def insert_newsSite_lastBuilt(client, url):
	try:
		db = selectDB(client)
		db.newsbuilds.insert_one({'url':url,'ts':dtNow()})
	except Exception as ex:
		logErr = createLogger("DAL", "DAL_error")
		logErr.critical(str(ex), exc_info=True)
예제 #10
0
def update_newsSite_lastBuilt(client, id):
	try:
		db = selectDB(client)
		db.newsbuilds.update({'_id':ObjectId(id)}, {'$set':{'ts':dtNow()}})
	except Exception as ex:
		logErr = createLogger("DAL", "DAL_error")
		logErr.critical(str(ex), exc_info=True)
예제 #11
0
def processSite(site):
    DUP_TITLES = []  # clean slate every site
    print(str(dtNow()) + "		" + " processSite start")
    print(str(dtNow()) + "		  " + site)
    mayVal = mayProcessArticles(site)
    b = newspaper.build(
        site,
        memoize_articles=True)  # False to disable cache ; True in production
    markSiteAsBuilt(site, mayVal)
    if mayVal[0]:
        divideWork(
            processArticles, b.articles,
            N_ARTICLES_PARALLEL)  # how many articles to process in parallel
    else:
        print(str(dtNow()) + "		" + " skipping processArticles")
    print(str(dtNow()) + "		" + " processSite end")
예제 #12
0
def liveness_IAmAlive(client, name):
	try:
		db = selectDB(client)
		db.liveness.update({'name': name}, {'name': name, 'timestamp': dtNow()}, upsert=True)
	except Exception as ex:
		logErr = createLogger("DAL", "DAL_error")
		logErr.critical(str(ex), exc_info=True)
예제 #13
0
def persistMatch(body, title, url, crypto):
    source = getDomainFromString(url)
    print(str(dtNow()) + "				" + " found at source: " + source)
    print(url)
    print(title)
    print(body)
    print()
    print()
예제 #14
0
def mayProcessArticles(site):
    # make sure the site was built less than an hour ago
    # if not, we should build it first, so second iteration will be considered as real-time data
    # if we don't do this then we may produce data that is old and was not published in current window-time
    lastb = DAL.check_when_newsSite_lastBuilt(client, site)
    if len(lastb) == 0:
        return [False, 0]  # new site, has not been built yet
    now = dtNow()
    lastcheck = lastb[0]['ts']
    ds = (now - lastcheck).total_seconds()
    print(str(dtNow()) + "		" + " last build: " + str(ds) + " sec ago.")
    if ds > 60 * 60:
        return [
            False, lastb[0]['_id']
        ]  # more than an hour passed since last check, build now and produce in next iteration
    else:
        return [True, lastb[0]['_id']]
예제 #15
0
def producer_send_mentionsSocial(body, source, url, crypto, producer):

    ejs = json.dumps({
        'body': body,
        'source': source,
        'url': url,
        'crypto': crypto,
        'type': 'social',
    })

    producer.send(kafkaTopic_mentionsSocial, ejs.encode())

    print(str(dtNow().strftime("%Y-%m-%d %H:%M:%S")) + " SENT:\t" + ejs)
    print()
예제 #16
0
def producer_send_mentionsNews(body, title, source, url, crypto, producer):
    ejs = json.dumps({
        'body': body,
        'title': title,
        'source': source,
        'url': url,
        'crypto': crypto,
        'type': 'news',
    })

    producer.send(kafkaTopic_mentionsNews, ejs.encode())

    print(str(dtNow().strftime("%Y-%m-%d %H:%M:%S")) + " SENT:\t" + source)
    print()
예제 #17
0
def streamAll():
    producer = producerMgr.create_kafkaProducer()
    subreddits = list(CRYPTO_redditProducer_subreddits.values())  # get values
    subreddits = [item for items in subreddits for item in items]  # flatten
    querystring = "+".join(subreddits)
    log.info(querystring)

    CryptoMapping = list(CRYPTO_socialKeywords.items())
    while True:
        try:
            client = DAL.openConnection()
            alive_counter = dtNow()
            reddit = praw.Reddit(client_id='EzcegP77YYq7dg',
                                 client_secret="CwTogkSNVPGIJFiQdWyZF_Gqqr4",
                                 user_agent='USERAGENT')
            for comment in reddit.subreddit(querystring).stream.comments():
                if comment.body.find('Your submission has been flagged') == -1:
                    body = comment.link_title + " | " + comment.body  # let's construct a new 'body' since comments don't always tell which crypto is discussed
                    sbody = nltk.wordpunct_tokenize(body.lower())
                    for crypto, kws in CryptoMapping:
                        for kw in kws:
                            if kw in sbody:
                                log.info("sending to kafka: " +
                                         comment.link_url)
                                producerMgr.producer_send_mentionsSocial(
                                    comment.body, 'reddit', comment.link_url,
                                    crypto, producer)
                                if (dtNow() -
                                        alive_counter).total_seconds() >= 15:
                                    DAL.liveness_IAmAlive(
                                        client, "producer: reddit")
                                    alive_counter = dtNow()
                                break  # one signal per crypto only
        except Exception as ex:
            logErr.critical(str(ex), exc_info=True)
        time.sleep(20)
예제 #18
0
    def on_data(self, data):
        try:
            data = json.loads(data)
            if 'user' in data:
                body, url = '', ''
                if 'user' in data and not 'retweeted_status' in data:
                    body = data['text']
                    url = 'https://twitter.com/' + data['user'][
                        'screen_name'] + '/status/' + data['id_str']
                else:  # this is a re-tweet, so let us extract the original tweet
                    url = 'https://twitter.com/' + data['retweeted_status'][
                        'user']['screen_name'] + '/status/' + data[
                            'retweeted_status']['id_str']
                    body = (
                        data['retweeted_status']['extended_tweet']['full_text']
                        if data['retweeted_status']['truncated'] else
                        data['retweeted_status']['text'])

                sbody = nltk.wordpunct_tokenize(body.lower())
                for crypto, kws in self.CryptoMapping:
                    for kw in kws:
                        if kw in sbody:
                            log.info("sending to kafka: " + url)
                            producerMgr.producer_send_mentionsSocial(
                                body, 'twitter', url, crypto, producer)
                            if (dtNow() -
                                    self.alive_counter).total_seconds() >= 15:
                                DAL.liveness_IAmAlive(self.client,
                                                      "producer: twitter")
                                self.alive_counter = dtNow()
                            break  # one signal per crypto only
            else:
                log.info(data)
        except Exception as ex:
            logErr.critical(str(ex), exc_info=True)
        return True
예제 #19
0
def processArticle(article):
    try:

        article.download()
        article.parse()

        if article.title in DUP_TITLES:
            pass  # preventing duplicate entries who have same headline but different url (due to url params etc...)
        else:
            DUP_TITLES.append(article.title)
            for crypto, terms in CRYPTO_socialKeywords.items():
                if contains(article.title, terms) or contains(
                        article.text, terms):
                    persistMatch(article.text, article.title, article.url,
                                 crypto)
    except:
        print(str(dtNow()) + "				" + "error processingArticle")
예제 #20
0
def main(args = sys.argv):

    # by default, this retrieves all data, it does not aggregate,
    # if you wish to aggregate above 1min, then do it manually.

    client = DAL.openConnection()
    db=client.crypto

    if not len(args) >= 2:
        print("expected exchange parameter, e.g.: binance")
        sys.exit(0)
    exchange = args[1]
    
    if not len(args) >= 3:
        print("expected base currency parameter, e.g.: BTC")
        sys.exit(0)
    base_cur = args[2]

    if not len(args) >= 4:
        print("expected quote currency parameter, e.g.: USDT")
        sys.exit(0)
    quote_cur = args[3]

    if not len(args) >= 5:
        print("expected interval parameter, e.g.: 1 (=1 minute)")
        sys.exit(0)
    INTERVAL = int(args[4])

    if not len(args) >= 6:
        print("expected historymins parameter, e.g.: 60 (=60 minutes)")
        sys.exit(0)
    historymins = int(args[5])

    if not len(args) >= 7:
        print("expected currentDateTime parameter")
        sys.exit(0)
    currentDateTime = datetime.datetime.strptime(args[6], '%Y-%m-%dT%H:%M')
    if currentDateTime > dtNow():
        currentDateTime = dtNow().replace(second=0,microsecond=0)


    # create correct min and max according to total window size and intervals: [min, max[
    maxDateTimeExcluded = currentDateTime
    if INTERVAL > 1: # make sure we only retrieve complete intervals (not still evolving data) -- to prevent caching issues
        maxDateTimeExcluded = currentDateTime.replace(minute=currentDateTime.minute-(currentDateTime.minute % INTERVAL))
    minDateTimeIncluded = maxDateTimeExcluded - datetime.timedelta(minutes=historymins)


    if INTERVAL < 60:
        def adjust_func(e): e['label'] =    str(e['_id']['year']).zfill(4)  + '-' +\
                                            str(e['_id']['month']).zfill(2)  + '-' +\
                                            str(e['_id']['day']).zfill(2)  + 'T' +\
                                            str(e['_id']['hour']).zfill(2)  + ':' +\
                                            str(e['_id']['interval']).zfill(2) 
        queryinterval = {
            'year': {'$year' : '$timestamp'},
            'month': {'$month' : '$timestamp'},
            'day': {'$dayOfMonth' : '$timestamp'},
            'hour': {'$hour' : '$timestamp'},
            'interval' : { # create 15-minute intervals: [0-15[ ; [15-30[ ; [30-45[ ; [45-60[
                '$subtract' : [ 
                    {'$minute' : '$timestamp'},
                    {'$mod':[{'$minute' : '$timestamp'}, INTERVAL]}
                ]
            }
        }
    elif INTERVAL >= 60 and INTERVAL < 1440: # hour interval
        def adjust_func(e): e['label'] =    str(e['_id']['year']).zfill(4)  + '-' +\
                                            str(e['_id']['month']).zfill(2)  + '-' +\
                                            str(e['_id']['day']).zfill(2)  + 'T' +\
                                            str(e['_id']['interval']).zfill(2)  + ':00'
        queryinterval = {
            'year': {'$year' : '$timestamp'},
            'month': {'$month' : '$timestamp'},
            'day': {'$dayOfMonth' : '$timestamp'},
            'interval' : { 
                '$subtract' : [ 
                    {'$hour' : '$timestamp'},
                    {'$mod':[{'$hour' : '$timestamp'}, int(INTERVAL/60)]}
                ]
            }
        }
    else:
        def adjust_func(e): e['label'] =    str(e['_id']['year']).zfill(4)  + '-' +\
                                            str(e['_id']['month']).zfill(2)  + '-' +\
                                            str(e['_id']['day']).zfill(2)  + 'T00:00'
        queryinterval = {
            'year': {'$year' : '$timestamp'},
            'month': {'$month' : '$timestamp'},
            'day': {'$dayOfMonth' : '$timestamp'},
        }

    pipeline = [
        {'$match' : 
            {   'base_cur' : base_cur,
                'quote_cur' : quote_cur,
                'exchange':     exchange,
                'timestamp': {
                        '$gte': minDateTimeIncluded,
                        '$lt': maxDateTimeExcluded,
                    }
            }
        },
        {'$group' : 
            {   '_id' : {   },
                'low' : {'$min':'$data.low'},
                'high' : {'$max':'$data.high'},
                'open': {'$first':'$$ROOT.data.open'},
                'close': {'$last':'$$ROOT.data.close'},
                'volume':{'$sum':'$data.volume'},
                'trades':{'$sum':'$data.trades'},
                'count': {'$sum':1},
            }
        }
    ]
    pipeline[1]['$group']['_id'] = queryinterval

    cursor = db.get_collection('exchanges').aggregate(pipeline);
    FINAL = []
    for e in cursor:
        if e['count'] < INTERVAL: # make sure candlestick (if aggregated) is complete (if not, probably historymins too short, or missing data)
            continue
        adjust_func(e)
        e['label_dt'] = datetime.datetime.strptime(e['label'], '%Y-%m-%dT%H:%M')
        e['label_to'] = datetime.datetime.strftime(e['label_dt'] + datetime.timedelta(minutes=INTERVAL), '%Y-%m-%dT%H:%M')
        del e['_id']
        FINAL.append(e)


    FINAL =   sorted(FINAL, key=(lambda x:( x['label_dt'] ) ))
    for e in FINAL:
       e.pop('label_dt', None)

    return FINAL
예제 #21
0
            'quote_cur': 'USDT'
        },
    ]  # testing
    seq_pred_len = 1
    predict_n_intervals_arr = [12]
    n_windows = [32]
    n_neurons = [2]
    n_hiddenlayers = [1]
    n_epochs = [1000]
    intervals = [
        int(args[1]),
    ]
    n_batch_sizes = [
        512,
    ]
    datasets = makeDatasets()
    _dtnow = dtNow()
    pmanager = multiprocessing.Manager()
    sync_dict_json = pmanager.dict()
    sync_list_output = pmanager.list()
    arrParams = {}
    threads = []

    HH_max = 1
    dtstart = dtNow()

    #dtstart = datetime.strptime('2018-04-07 15:00', '%Y-%m-%d %H:%M')
    #HH_max = 20 # --> dtstart + ( i in HH_max) * interval

    train_predict()
예제 #22
0
def main(args=sys.argv):

    client = DAL.openConnection()
    db = client.crypto

    if not len(args) >= 2:
        print("expected crypto parameter, e.g. BTC [interval mins]")
        sys.exit(0)

    INTERVAL_GRAPH_mentionsBasic = 60  # 60 minutes default
    if len(args) >= 3:
        INTERVAL_GRAPH_mentionsBasic = int(args[2])

    currentDateTime = dtNow().replace(second=0, microsecond=0)
    if len(args) >= 5:
        currentDateTime = datetime.datetime.strptime(
            args[4], '%Y-%m-%dT%H:%M'
        )  # in future the user may send datetime from another tz, use dtLocal()
        if currentDateTime > dtNow():
            currentDateTime = dtNow().replace(second=0, microsecond=0)

    # create correct min and max according to total window size and intervals: [min, max[
    maxDateTimeExcluded = currentDateTime
    if INTERVAL_GRAPH_mentionsBasic > 1:
        maxDateTimeExcluded = currentDateTime.replace(
            minute=currentDateTime.minute -
            (currentDateTime.minute % INTERVAL_GRAPH_mentionsBasic))

    WINDOW = 1440
    if len(args) >= 4:  # value in minutes
        WINDOW = int(args[3])
    minDateTimeIncluded = maxDateTimeExcluded - datetime.timedelta(
        minutes=WINDOW)

    if INTERVAL_GRAPH_mentionsBasic < 60:

        def adjust_func(e):            e['label'] =  str(e['_id']['year']).zfill(4)  + '-' +\
 str(e['_id']['month']).zfill(2)  + '-' +\
 str(e['_id']['day']).zfill(2)  + 'T' +\
 str(e['_id']['hour']).zfill(2)  + ':' +\
 str(e['_id']['interval']).zfill(2)

        interval = {
            'year': {
                '$year': '$timestamp'
            },
            'month': {
                '$month': '$timestamp'
            },
            'day': {
                '$dayOfMonth': '$timestamp'
            },
            'hour': {
                '$hour': '$timestamp'
            },
            'interval':
            {  # create 15-minute intervals: [0-15[ ; [15-30[ ; [30-45[ ; [45-60[
                '$subtract': [{
                    '$minute': '$timestamp'
                }, {
                    '$mod': [{
                        '$minute': '$timestamp'
                    }, INTERVAL_GRAPH_mentionsBasic]
                }]
            }
        }
    elif INTERVAL_GRAPH_mentionsBasic >= 60 and INTERVAL_GRAPH_mentionsBasic < 1440:  # hour interval

        def adjust_func(e):            e['label'] =  str(e['_id']['year']).zfill(4)  + '-' +\
 str(e['_id']['month']).zfill(2)  + '-' +\
 str(e['_id']['day']).zfill(2)  + 'T' +\
 str(e['_id']['interval']).zfill(2)  + ':00'

        interval = {
            'year': {
                '$year': '$timestamp'
            },
            'month': {
                '$month': '$timestamp'
            },
            'day': {
                '$dayOfMonth': '$timestamp'
            },
            'interval': {
                '$subtract': [{
                    '$hour': '$timestamp'
                }, {
                    '$mod': [{
                        '$hour': '$timestamp'
                    },
                             int(INTERVAL_GRAPH_mentionsBasic / 60)]
                }]
            }
        }
    else:

        def adjust_func(e):            e['label'] =  str(e['_id']['year']).zfill(4)  + '-' +\
 str(e['_id']['month']).zfill(2)  + '-' +\
 str(e['_id']['day']).zfill(2)  + 'T00:00'

        interval = {
            'year': {
                '$year': '$timestamp'
            },
            'month': {
                '$month': '$timestamp'
            },
            'day': {
                '$dayOfMonth': '$timestamp'
            },
        }

    pipeline = [{
        '$match': {
            'fromSymbol': {
                '$eq': args[1]
            },
            'timestamp': {
                '$gte': minDateTimeIncluded,
                '$lt': maxDateTimeExcluded,
            }
        }
    }, {
        '$group': {
            '_id': {},
            'avg': {
                '$avg': '$fromVol24_sum'
            },
        }
    }]
    pipeline[1]['$group']['_id'] = interval
    #print(pipeline)

    cursor = db.get_collection('volumes').aggregate(pipeline)
    result = list(cursor)
    FINAL = []

    # pre-process:
    for e in result:
        adjust_func(e)
        e['label_dt'] = datetime.datetime.strptime(e['label'],
                                                   '%Y-%m-%dT%H:%M')

        if e['label_dt'] < minDateTimeIncluded or (
                e['label_dt'] +
                datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic)
        ) > maxDateTimeExcluded:
            continue

        e['start'] = str(e['label_dt'])
        e['end'] = str(e['label_dt'] + datetime.timedelta(
            minutes=INTERVAL_GRAPH_mentionsBasic))
        e['label'] = str(
            datetime.datetime.strftime(e['label_dt'], '%Y-%m-%dT%H:%M')
        )  # + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic)
        e.pop('_id', None)
        FINAL.append(e)

    # add missing intervals
    tmp_datetime = min([
        x['label_dt'] for x in FINAL
    ])  # we need smallest interval, not just minDateTimeIncluded
    while (tmp_datetime +
           datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic) <
           maxDateTimeExcluded):
        contains = False
        for e in FINAL:
            if e['label_dt'] == tmp_datetime:
                contains = True
                break
        if not contains:
            e_tmp = copy.copy(FINAL[0])
            e_tmp['label_dt'] = tmp_datetime
            e_tmp['start'] = str(e_tmp['label_dt'])
            e_tmp['end'] = str(e_tmp['label_dt'] + datetime.timedelta(
                minutes=INTERVAL_GRAPH_mentionsBasic))
            e_tmp['label'] = str(
                datetime.datetime.strftime(tmp_datetime, '%Y-%m-%dT%H:%M')
            )  # + datetime.timedelta(minutes=INTERVAL_GRAPH_mentionsBasic)
            e_tmp['avg'] = None
            e_tmp['avg_delta'] = None
            FINAL.append(e_tmp)
        tmp_datetime = tmp_datetime + datetime.timedelta(
            minutes=INTERVAL_GRAPH_mentionsBasic)

    # sort list :
    sorted_list = sorted(FINAL, key=(lambda x: (x['label_dt'])))
    FINAL = []

    # post-process:
    prev_avg = None
    for e in sorted_list:
        if e['avg'] != None:
            e['avg'] = round(e['avg'], 2)
            if prev_avg == None:
                e['avg_delta'] = 0
            else:
                e['avg_delta'] = round(e['avg'] - prev_avg, 2)
            prev_avg = e['avg']

        del e['label_dt']
        FINAL.append(e)

    return FINAL
예제 #23
0
def train_predict(args=sys.argv):

    # we need to generate every possible combination of our configuration, let's pre-process it.
    # we basically create and store tuples in an array.
    # the array will be processed in a multi-processing fashion.
    # we don't want to parallellize every possible combination,
    # but instead we want to have max 6 to 9 processes running at the same time.
    # that's why at the deepest level we have a "uid" which acts as separator.

    # this is an important part, because if you have many different combinations you want to try out (e.g. different epochs and neuron counts),
    # then you want to make sure the processes don't take too long or make the server crash due to too many processes (or memory consumption).

    for HH in range(HH_max):
        for exchange in sorted(exchanges):
            for symbol in sorted(symbols, key=lambda x: x['base_cur']):
                for featuresID, dataset_func in datasets.items():
                    for n_window in n_windows:
                        for interval in intervals:
                            for n_epoch in n_epochs:
                                for n_neuron in n_neurons:
                                    for n_hiddenlay in n_hiddenlayers:
                                        for n_batch_size in n_batch_sizes:
                                            for predict_n_intervals in predict_n_intervals_arr:

                                                h5fn = h5Dir + 'predictions_v1' + ' base_cur=' + symbol[
                                                    'base_cur'] + ' base_cur=' + symbol[
                                                        'quote_cur'] + ' fid=' + featuresID + ' interval=' + str(
                                                            interval
                                                        ) + ' n_window=' + str(
                                                            n_window
                                                        ) + ' n_epoch=' + str(
                                                            n_epoch
                                                        ) + ' n_batch_size=' + str(
                                                            n_batch_size
                                                        ) + ' n_neuron=' + str(
                                                            n_neuron
                                                        ) + ' predict_n_intervals=' + str(
                                                            predict_n_intervals
                                                        ) + ' n_hiddenlay=' + str(
                                                            n_hiddenlay)
                                                _dtime = adjustDatetime_realtime(
                                                    interval,
                                                    dtstart + timedelta(
                                                        minutes=HH * interval))

                                                uid = symbol[
                                                    'base_cur']  #+"_"+symbol['quote_cur']+"_"+str(n_neuron)+"_"+str(n_window) # way to parallellize processing
                                                if not uid in arrParams:
                                                    arrParams[uid] = []
                                                arrParams[uid].append(
                                                    (h5fn, featuresID,
                                                     exchange, symbol,
                                                     n_window, interval,
                                                     _dtime,
                                                     predict_n_intervals,
                                                     n_neuron, n_hiddenlay,
                                                     n_epoch, n_batch_size,
                                                     dataset_func,
                                                     sync_dict_json,
                                                     sync_list_output,
                                                     seq_pred_len))

    # now that we have our magical array of jobs/tasks,
    # let's create a processing pool and execute all jobs accordingly.

    tasks = {}
    pools = {}
    for idf, arr in arrParams.items():
        tasks[idf] = []
        if not idf in pools:
            pools[idf] = multiprocessing.Pool(1)
        for tup in arr:
            tasks[idf].append(pools[idf].apply_async(fitAndPredict_trainAlways,
                                                     tup))

    client = DAL.openConnection()
    DAL.liveness_IAmAlive(client, "producer: predictions")

    for idf, arr in tasks.items():
        for task in arr:
            try:
                task.get(timeout=60 * 20)
            except KeyboardInterrupt:
                raise
            except:
                traceback.print_exc()

        pools[idf].close()

    for sendobj in sync_list_output:
        DAL.store_predictions_v1(client, sendobj)

    print("/performance/")
    print("started:")
    print(_dtnow)
    print("ended:")
    print(dtNow())
    print("/exited/")
    print("")

    log = createLogger("predictions_v1_info", "predictions_v1_info")
    log.info("/performance/")
    log.info("started:")
    log.info(str(_dtnow))
    log.info("ended:")
    log.info(str(dtNow()))
    log.info("/exited/")
    log.info("")
예제 #24
0
client = DAL.openConnection()
db = client.crypto

if not len(sys.argv) >= 2:
    print("expected exchange and symbol parameters, e.g. binance BTCUSDT ")
    sys.exit(0)

exchange = sys.argv[1]
symbol = sys.argv[2]

INTERVAL = 30
if len(sys.argv) >= 4:
    INTERVAL = int(sys.argv[3])

currentDateTime = dtNow().replace(second=0, microsecond=0)
if len(sys.argv) >= 6:
    currentDateTime = datetime.datetime.strptime(
        sys.argv[5], '%Y-%m-%dT%H:%M'
    )  # in future the user may send datetime from another tz, use dtLocal()
    if currentDateTime > dtNow():
        currentDateTime = dtNow().replace(second=0, microsecond=0)

maxDateTimeExcluded = currentDateTime
if INTERVAL > 1:  #  and INTERVAL <= 60
    maxDateTimeExcluded = currentDateTime.replace(
        minute=currentDateTime.minute - (currentDateTime.minute % INTERVAL))

WINDOW = 1440
if len(sys.argv) >= 5:  # value in minutes
    WINDOW = int(sys.argv[4])
예제 #25
0
 def __init__(self):
     self.CryptoMapping = list(CRYPTO_socialKeywords.items())
     self.client = DAL.openConnection()
     self.alive_counter = dtNow()