def update_signal_recalculate(): dthour = request.args.get('datetime') du = DateUtil() specific_hour_dt = du.parse_time_string(dthour) predictions = [] coins = generate_coins(specific_hour_dt) for coin in coins: earliest_date = dbconn.get_earliest_date_in_db(coin) print(coin.name, "earliest date", earliest_date) one_day_before = specific_hour_dt + timedelta(days=-1) coin.loadtime = one_day_before.strftime("%Y-%m-%d") print("checking pred in DB for ", coin, "at", specific_hour_dt) tsfrom = (specific_hour_dt + timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00") tsto = specific_hour_dt.strftime("%Y-%m-%d %H:00:00") print(tsfrom, tsto) do_prepare(coin, specific_hour_dt) predictions = do_predict(coin, predictions, specific_hour_dt) return jsonify({'predictions': predictions})
def get_signal_history(): dthour = request.args.get('datetime') du = DateUtil() specific_hour_dt = du.parse_time_string(dthour) predictions = [] coins = generate_coins(specific_hour_dt) coininfo = CoinInfo() for coin in coins: earliest_date = dbconn.get_earliest_date_in_db(coin) print(coin.name, "earliest date", earliest_date) one_day_before = specific_hour_dt + timedelta(days=-1) coin.loadtime = one_day_before.strftime("%Y-%m-%d") print("checking pred in DB for ", coin, "at", specific_hour_dt) tsfrom = (specific_hour_dt + timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00") tsto = specific_hour_dt.strftime("%Y-%m-%d %H:00:00") print(tsfrom, tsto) pred = dbconn.check_prediction_in_db(coin, tsfrom, tsto) print("pred:", pred) if pred is None: coininfo.do_prepare(coin, specific_hour_dt) predictions = do_predict(coin, predictions, specific_hour_dt) else: predictions.append(pred) return jsonify({'predictions': predictions})
def get_nr_of_hour_distance_from_server(self): nowdt = datetime.now() dateutil = DateUtil() nowmillis = dateutil.unix_time_millis(nowdt) server_time = self.get_server_time()['serverTime'] diff = (nowmillis - server_time) / 1000 / 3600 diff = round(diff) return diff
def test_dateutil1(self): dateutil = DateUtil() ba = BinanceApi() print(ba.get_server_time()) nowdt = datetime.now() nowdt = nowdt + timedelta(hours=-2) binancedt_int = dateutil.binance_datetime_int(nowdt) print("binancedt_int ", binancedt_int)
def get_last_n_hour_by_specific_hour_by_coin(self, coin, specific_hour, n_hours): coinname = coin.name coinnameupper = coinname.upper() offset_from_server = self.get_nr_of_hour_distance_from_server() #once you have to substract the offset from server targethour = specific_hour + timedelta(hours=-1 * offset_from_server) #twice you have to substract because the server end time means until the next hour. >:) targethour = targethour + timedelta(hours=-1) date_util = DateUtil() start_time = round(date_util.unix_time_millis(targethour)) apiurl='https://api.binance.com/api/v1/klines?symbol='+coinnameupper+'BTC&endTime='+str(start_time)+\ '&interval=1h&limit='+str(n_hours) print("calling: " + apiurl) response = requests.get(apiurl) return response.json()
def get_signal(): predictions = [] try: du = DateUtil() last_round_hour = du.parse_time_string(du.last_round_hour()) coins = generate_coins(last_round_hour) for coin in coins: pred = dbconn.check_prediction_in_db_last_hour(coin) if pred is None: do_prepare(coin, last_round_hour) predictions = do_predict(coin, predictions, last_round_hour) else: predictions.append(pred) return jsonify({'predictions': predictions}) except Exception as e: print("Exception: ", e) return jsonify({'exception': predictions})
def fill_past_signals(): predictions = [] coins = generate_coins(datetime.now()) coininfo = CoinInfo() du = DateUtil() for coin in coins: earliest_date = dbconn.get_earliest_date_in_db(coin) print(coin.name, "earliest date", earliest_date) start_datetime = du.parse_time_string( du.round_datetime_down(earliest_date)) last_round_hour = du.parse_time_string(du.last_round_hour()) #adding 24 hours to have earlier data in the past for prediction curr_datetime = start_datetime + timedelta(hours=+24) while (curr_datetime < last_round_hour): coin.reset_data_frames() one_day_before = curr_datetime + timedelta(days=-1) coin.loadtime = one_day_before.strftime("%Y-%m-%d") curr_datetime = curr_datetime + timedelta(hours=+1) print("checking pred in DB for ", coin, "at", curr_datetime) tsfrom = (curr_datetime + timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00") tsto = curr_datetime.strftime("%Y-%m-%d %H:00:00") print(tsfrom, tsto) pred = dbconn.check_prediction_in_db(coin, tsfrom, tsto) print("pred:", pred) if pred is None: coininfo.do_prepare(coin, curr_datetime) predictions = do_predict(coin, predictions, curr_datetime) else: predictions.append(pred) return jsonify({'predictions': predictions})
def do_predict(coin, predictions, specific_hour): ##print(coin.gtdf.head()) data = coin.pricehourly.copy() convert_hour_col(data) times = pd.DatetimeIndex(data['datetime']) cointrain = CoinTrain() X_gtdf = cointrain.increase_by_one_hour(coin.gtdf) X_grtdf = cointrain.increase_by_one_hour(coin.grtdf) X = data # not really summing gX = X.groupby([times.year, times.month, times.day, times.hour]).open.sum() gXdf = pd.DataFrame(gX) # not really max just copy gXdf['high'] = X.groupby([times.year, times.month, times.day, times.hour])['high'].max() gXdf['low'] = X.groupby([times.year, times.month, times.day, times.hour])['low'].max() gXdf['close'] = X.groupby([times.year, times.month, times.day, times.hour])['close'].max() gXdf['volumefrom'] = X.groupby( [times.year, times.month, times.day, times.hour])['volumefrom'].max() gXdf['volumeto'] = X.groupby( [times.year, times.month, times.day, times.hour])['volumeto'].max() gXdf['high_raised'] = X.groupby( [times.year, times.month, times.day, times.hour])['high_raised'].max() gXdf['low_raised'] = X.groupby( [times.year, times.month, times.day, times.hour])['low_raised'].max() gXdf['close_raised'] = X.groupby( [times.year, times.month, times.day, times.hour])['close_raised'].max() print("X_grtdf") #print(X_grtdf) print("X_gtdf") #print(X_gtdf) cols = [ 'retweeter_followers', 'retweet_count', 'sum_posmulrfollower', 'sum_negmulrfollower', 'sum_neumulrfollower', 'sum_compmulrfollower' ] #type(data) # gXdf['retweeter_followers']=coin.grtdf['retweeter_followers'] # gXdf['retweet_count']=coin.grtdf['retweet_count'] # data[]=coin.grtdf[] print("renaming cols") gXdf.index = gXdf.index.rename(['year', 'month', 'day', 'hour']) gXdf = gXdf.merge(X_grtdf, how='left', left_index=True, right_index=True) Xdf = gXdf.merge(X_gtdf, how='left', left_index=True, right_index=True) data = Xdf data.reset_index(inplace=True) cointrain.spreadtweeteffect(data) cointrain.add_change_columns(data) data.fillna(0, inplace=True) data.drop( columns=[ 'year', 'month', 'hour', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto', # 'vf_change1','vt_change1', # 'vfvt_ratio','vtvf_ratio', # 'c_o_change', 'h_o_change', 'l_o_change', # 'c_o_change1', 'h_o_change1', 'l_o_change1', 'o_change1', 'o_change2', # 'o_change3', 'o_change4', 'o_change5', 'o_change6', 'o_change1_3', # 'o_change1_12', # 'retweeter_followers', # 'retweet_count', # 'sum_posmulrfollower', 'sum_negmulrfollower', # 'sum_neumulrfollower', 'sum_compmulrfollower', 'follower_count', # 'tweet_count', # 'sum_posmulfollower', 'sum_negmulfollower', # 'sum_neumulfollower', 'sum_compmulfollower', 'asia_market', 'eu_market', 'us_market', 'day', 'max_datetime_x', 'max_datetime_y' ], inplace=True) #data = data[COLS] coin.data_to_predict = data print(data.tail()) condition = data.columns != shouldbe_cols if (len(condition[condition == True]) > 0): #data.columns!=shouldbe_cols): print("Columns/Features are not the same as in the model, exiting") print("which col not equal: ", data.columns != shouldbe_cols) print("shouldbe_cols ") print(shouldbe_cols) print('data.columns') print(data.columns) exit(1) print("saving to storeage... ") spec_hour_str = str(specific_hour.strftime("%Y-%m-%d_%H-%M-%S")) coin.save_to_storeage(PHASE, tmpdir='runtime/' + spec_hour_str + '/') print("saving is done.") min_max_scaler = coin.read_scaler() scaled_data = min_max_scaler.transform(data) data = pd.DataFrame(scaled_data) #print(data.tail()) # load json and create model def precision(y_true, y_pred): threshold = 0.3 mult = 0.5 / threshold true_positives = K.sum(K.round(K.clip(y_true * y_pred * mult, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred * mult, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision def recall(y_true, y_pred): threshold = 0.3 mult = 0.5 / threshold true_positives = K.sum(K.round(K.clip(y_true * y_pred * mult, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall #metrics=[precision,recall,'accuracy'] metrics = {"precision": precision, 'recall': recall} print("loading model for coin:" + coin.name) #model = load_model("./data/altcoin-storage/"+coin.name+"_keras_model.h5",custom_objects=metrics) #loading once> p = Prediction() model = p.load_model(coin, metrics) print("doing predictions.") pred = model.predict(data) print("predictions are ready.") coinbinancename = coin.name.upper() + "BTC" chance = pred[len(pred) - 1][0] signal = 0 treshold = coin.treshold if chance > treshold: signal = 1 #generating prediction du = DateUtil() specific_hour_minus_one = specific_hour + timedelta(hours=-1) pred = p.generate_prediction(specific_hour_minus_one, specific_hour, coinbinancename, chance, treshold, signal) dbconn.save_predictions([pred]) predictions.append(pred) return predictions
def test_dateutil0(self): dateutil = DateUtil() print( datetime.fromtimestamp( int("1523689200")).strftime('%Y-%m-%d %H:%M:%S'))
def __init__(self): print("init TweetIO") self.dateutil=DateUtil()
class TweetIO: def __init__(self): print("init TweetIO") self.dateutil=DateUtil() def read_db_tweet_last_n_hour_by_specific_hour_by_coin(self, coin,specific_hour,n_hours=48): df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id','user_name','retweet_count','favorite_count', 'timestamp','lang']) tsto=self.dateutil.round_datetime_down(specific_hour) tsfrom=self.dateutil.round_datetime_down(specific_hour+timedelta(hours=-1*n_hours)) for tag in coin.hashtags: print("getting ",tag," from: ",tsfrom," to: ",tsto) df=self.read_db_tweet(tsfrom,tsto,tag,df) return df def read_db_tweet_last_n_hour_by_coin(self, coin): df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id','user_name','retweet_count','favorite_count', 'timestamp','lang']) tsfrom = self.dateutil.two_days_ago() tsto = self.dateutil.last_round_hour() for tag in coin.hashtags: print("getting ",tag," from: ",tsfrom," to: ",tsto) df=self.read_db_tweet(tsfrom,tsto,tag,df) return df def read_db_tweet(self,tsfrom,tsto,searchtext,df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id', 'user_name','retweet_count','favorite_count', 'timestamp','lang'])): select_query = """ select * from tweet where created_at > '"""+tsfrom+"""' and created_at < '"""+tsto+"""' and text like '%"""+searchtext+"""%' and lang like 'en'; """ print(select_query) rows=dbconn.query(select_query) i=len(df) print("len rows",len(rows)) print("i from ",i) for dbrow in rows: df.at[i,'id']=dbrow['id'] df.at[i,'text']=dbrow['text'] df.at[i,'user_id']=dbrow['user_id_str'] df.at[i,'user_history_row_id']=dbrow['user_history_row_id'] df.at[i,'user_name']=dbrow['user_name'] df.at[i,'retweet_count']=dbrow['retweet_count'] df.at[i,'favorite_count']=dbrow['favorite_count'] df.at[i,'timestamp']=dbrow['created_at'] df.at[i,'lang']=dbrow['lang'] i+=1 print("i until ",i) print("len(df)",len(df)) return df def read_db_retweet_last_n_hour_by_specific_hour_by_coin(self, coin,specific_hour,n_hours=48): df=pd.DataFrame(columns=['retweet_id','orig_tweet_id', 'retweeter_followers','retweet_created_at', 'user_history_row_id','user_id_str']) tsto=self.dateutil.round_datetime_down(specific_hour) tsfrom=self.dateutil.round_datetime_down(specific_hour+timedelta(hours=-1*n_hours)) for tag in coin.hashtags: print("getting ",tag," from: ",tsfrom," to: ",tsto) df=self.read_db_retweet(tsfrom,tsto,tag,df) return df def read_db_retweet_last_n_hour_by_coin(self, coin): df=pd.DataFrame(columns=['retweet_id','orig_tweet_id', 'retweeter_followers','retweet_created_at', 'user_history_row_id','user_id_str']) tsfrom=self.dateutil.two_days_ago() tsto=self.dateutil.last_round_hour() for tag in coin.hashtags: print("getting ",tag," from: ",tsfrom," to: ",tsto) df=self.read_db_retweet(tsfrom,tsto,tag,df) return df def read_db_retweet(self,tsfrom,tsto,searchtext,df=pd.DataFrame(columns=['retweet_id','orig_tweet_id', 'retweeter_followers','retweet_created_at', 'user_history_row_id','user_id_str'])): select_query = """ select * from retweet where retweet_created_at > '"""+tsfrom+"""' and retweet_created_at < '"""+tsto+"""' and orig_tweet_id in (select id from tweet where text like '%"""+searchtext+"""%' and lang like 'en'); """ print(select_query) rows=dbconn.query(select_query) i=len(df) for dbrow in rows: df.at[i,'retweet_id']=dbrow['retweet_id'] df.at[i,'orig_tweet_id']=dbrow['orig_tweet_id'] df.at[i,'retweeter_followers']=dbrow['retweeter_followers'] df.at[i,'retweet_created_at']=dbrow['retweet_created_at'] df.at[i,'user_history_row_id']=dbrow['user_history_row_id'] df.at[i,'user_id_str']=dbrow['user_id_str'] i+=1 return df def read_db_referenced_users(self,coin): tdf=coin.tweets rtdf=coin.retweets referenced_user_ids=[] for index,row in tdf.iterrows(): referenced_user_ids.append(row['user_history_row_id']) for index,row in rtdf.iterrows(): referenced_user_ids.append(row['user_history_row_id']) return self.read_db_users(referenced_user_ids) def read_db_users(self,referenced_user_ids): df=pd.DataFrame(columns=['user_row_id','twitter_user_id','user_name', 'follower_count','friends_count','listed_count', 'favourites_count','statuses_count','user_created_at']) ruids=str(referenced_user_ids) ruids=ruids[1:] ruids=ruids[:-1] select_query = """ select * from tweet_user_history where id in ("""+ruids+"""); """ print(select_query) rows=dbconn.query(select_query) i=len(df) for dbrow in rows: df.at[i,'user_row_id']=dbrow['id'] df.at[i,'twitter_user_id']=dbrow['user_id'] df.at[i,'user_name']=dbrow['user_name'] df.at[i,'follower_count']=dbrow['followers_count'] df.at[i,'friends_count']=dbrow['friends_count'] df.at[i,'listed_count']=dbrow['listed_count'] df.at[i,'favourites_count']=dbrow['favourites_count'] df.at[i,'statuses_count']=dbrow['statuses_count'] df.at[i,'user_created_at']=dbrow['user_created_at'] i+=1 return df def read_all_scraped_tweet(self,coin,tmpdir=''): coin_name = coin.name dir = './data/'+tmpdir+'altcoin-tweets/' + coin_name + '/' print("reading tweet files from: "+dir) ci=CoinInfo() dflist=[] list=ci.list_tweetfiles(dir) for tweetfile in list: print("reading in: "+tweetfile) df=pd.read_json(path_or_buf=tweetfile) dflist.append(df) df=pd.concat(dflist) print("collected tweets: " + str(df['id'].count())) print("dropping duplicate tweets: ") df=df.drop_duplicates('id') print("collected tweets: " +str(df['id'].count())) df.reset_index(inplace=True) return df def read_all_scraped_retweet(self,coin,tmpdir=''): coin_name = coin.name dir = './data/'+tmpdir+'altcoin-tweets/' + coin_name + '/' print("reading RETWEET files from: "+dir) ci=CoinInfo() dflist=[] list=ci.list_retweetfiles(dir) for retweetfile in list: print("reading in: "+retweetfile) df=pd.read_csv(retweetfile) dflist.append(df) df=pd.concat(dflist) print("collected retweets: " + str(df['retweet_id'].count())) print("dropping duplicate tweets: ") df=df.drop_duplicates('retweet_id') print("collected tweets: " +str(df['retweet_id'].count())) df.reset_index(inplace=True) return df def read_users_for_tweets(self,coin,tmpdir=''): cname=coin.name userdf=pd.read_csv('./data/'+tmpdir+'altcoin-tweets/'+cname+'/users_of_'+cname+'.csv') print("before merge: coin.tweets", len(coin.tweets.index)) print("before merge: userdf", len(userdf.index)) #print(coin.tweets.head()) coin.tweets=coin.tweets.merge(userdf, left_on='user', right_on='t_userid', how='inner') def sort_and_clip(self,df,date): df['tstamp']=pd.to_datetime(df["timestamp"]) df['t_int']=pd.to_numeric(df['tstamp']) df=df.sort_values('t_int') df=df[df['t_int']>self.date_to_int(date)] #df['score']=(1+df['likes']*1+df['replies']*1)*1#+df['replies'] df['count']=1 df['# Tweets Cumulative']=df['count'].cumsum() #df['Likes, Replies, Retweets']=df['score']*1 return df def date_to_int(self,strclipdate): t=pd.to_datetime(strclipdate) s=pd.Series([t]) ti=pd.to_numeric(s) return ti.at[0]
def test_dateutil0(self): dateutil = DateUtil() self.print_unix_as_date("1523692799000")