Example #1
0
def update_signal_recalculate():

    dthour = request.args.get('datetime')
    du = DateUtil()
    specific_hour_dt = du.parse_time_string(dthour)
    predictions = []
    coins = generate_coins(specific_hour_dt)

    for coin in coins:

        earliest_date = dbconn.get_earliest_date_in_db(coin)
        print(coin.name, "earliest date", earliest_date)

        one_day_before = specific_hour_dt + timedelta(days=-1)
        coin.loadtime = one_day_before.strftime("%Y-%m-%d")
        print("checking pred in DB for ", coin, "at", specific_hour_dt)

        tsfrom = (specific_hour_dt +
                  timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00")
        tsto = specific_hour_dt.strftime("%Y-%m-%d %H:00:00")
        print(tsfrom, tsto)

        do_prepare(coin, specific_hour_dt)
        predictions = do_predict(coin, predictions, specific_hour_dt)

    return jsonify({'predictions': predictions})
Example #2
0
def get_signal_history():

    dthour = request.args.get('datetime')
    du = DateUtil()
    specific_hour_dt = du.parse_time_string(dthour)
    predictions = []
    coins = generate_coins(specific_hour_dt)
    coininfo = CoinInfo()

    for coin in coins:

        earliest_date = dbconn.get_earliest_date_in_db(coin)
        print(coin.name, "earliest date", earliest_date)

        one_day_before = specific_hour_dt + timedelta(days=-1)
        coin.loadtime = one_day_before.strftime("%Y-%m-%d")
        print("checking pred in DB for ", coin, "at", specific_hour_dt)

        tsfrom = (specific_hour_dt +
                  timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00")
        tsto = specific_hour_dt.strftime("%Y-%m-%d %H:00:00")
        print(tsfrom, tsto)

        pred = dbconn.check_prediction_in_db(coin, tsfrom, tsto)
        print("pred:", pred)

        if pred is None:
            coininfo.do_prepare(coin, specific_hour_dt)
            predictions = do_predict(coin, predictions, specific_hour_dt)
        else:
            predictions.append(pred)

    return jsonify({'predictions': predictions})
Example #3
0
 def get_nr_of_hour_distance_from_server(self):
     nowdt = datetime.now()
     dateutil = DateUtil()
     nowmillis = dateutil.unix_time_millis(nowdt)
     server_time = self.get_server_time()['serverTime']
     diff = (nowmillis - server_time) / 1000 / 3600
     diff = round(diff)
     return diff
Example #4
0
 def test_dateutil1(self):
     dateutil = DateUtil()
     ba = BinanceApi()
     print(ba.get_server_time())
     nowdt = datetime.now()
     nowdt = nowdt + timedelta(hours=-2)
     binancedt_int = dateutil.binance_datetime_int(nowdt)
     print("binancedt_int ", binancedt_int)
Example #5
0
    def get_last_n_hour_by_specific_hour_by_coin(self, coin, specific_hour,
                                                 n_hours):
        coinname = coin.name
        coinnameupper = coinname.upper()
        offset_from_server = self.get_nr_of_hour_distance_from_server()
        #once you have to substract the offset from server
        targethour = specific_hour + timedelta(hours=-1 * offset_from_server)

        #twice you have to substract because the server end time means until the next hour. >:)
        targethour = targethour + timedelta(hours=-1)
        date_util = DateUtil()
        start_time = round(date_util.unix_time_millis(targethour))

        apiurl='https://api.binance.com/api/v1/klines?symbol='+coinnameupper+'BTC&endTime='+str(start_time)+\
               '&interval=1h&limit='+str(n_hours)

        print("calling: " + apiurl)
        response = requests.get(apiurl)
        return response.json()
Example #6
0
def get_signal():
    predictions = []
    try:

        du = DateUtil()
        last_round_hour = du.parse_time_string(du.last_round_hour())

        coins = generate_coins(last_round_hour)

        for coin in coins:
            pred = dbconn.check_prediction_in_db_last_hour(coin)
            if pred is None:
                do_prepare(coin, last_round_hour)
                predictions = do_predict(coin, predictions, last_round_hour)
            else:
                predictions.append(pred)
        return jsonify({'predictions': predictions})
    except Exception as e:
        print("Exception: ", e)
        return jsonify({'exception': predictions})
Example #7
0
def fill_past_signals():

    predictions = []
    coins = generate_coins(datetime.now())

    coininfo = CoinInfo()
    du = DateUtil()
    for coin in coins:

        earliest_date = dbconn.get_earliest_date_in_db(coin)
        print(coin.name, "earliest date", earliest_date)

        start_datetime = du.parse_time_string(
            du.round_datetime_down(earliest_date))
        last_round_hour = du.parse_time_string(du.last_round_hour())
        #adding 24 hours to have earlier data in the past for prediction
        curr_datetime = start_datetime + timedelta(hours=+24)
        while (curr_datetime < last_round_hour):
            coin.reset_data_frames()
            one_day_before = curr_datetime + timedelta(days=-1)
            coin.loadtime = one_day_before.strftime("%Y-%m-%d")

            curr_datetime = curr_datetime + timedelta(hours=+1)
            print("checking pred in DB for ", coin, "at", curr_datetime)

            tsfrom = (curr_datetime +
                      timedelta(hours=-1)).strftime("%Y-%m-%d %H:00:00")
            tsto = curr_datetime.strftime("%Y-%m-%d %H:00:00")
            print(tsfrom, tsto)

            pred = dbconn.check_prediction_in_db(coin, tsfrom, tsto)
            print("pred:", pred)

            if pred is None:
                coininfo.do_prepare(coin, curr_datetime)
                predictions = do_predict(coin, predictions, curr_datetime)
            else:
                predictions.append(pred)

    return jsonify({'predictions': predictions})
Example #8
0
def do_predict(coin, predictions, specific_hour):

    ##print(coin.gtdf.head())
    data = coin.pricehourly.copy()

    convert_hour_col(data)
    times = pd.DatetimeIndex(data['datetime'])

    cointrain = CoinTrain()
    X_gtdf = cointrain.increase_by_one_hour(coin.gtdf)
    X_grtdf = cointrain.increase_by_one_hour(coin.grtdf)

    X = data
    # not really summing
    gX = X.groupby([times.year, times.month, times.day, times.hour]).open.sum()
    gXdf = pd.DataFrame(gX)
    # not really max just copy
    gXdf['high'] = X.groupby([times.year, times.month, times.day,
                              times.hour])['high'].max()
    gXdf['low'] = X.groupby([times.year, times.month, times.day,
                             times.hour])['low'].max()
    gXdf['close'] = X.groupby([times.year, times.month, times.day,
                               times.hour])['close'].max()
    gXdf['volumefrom'] = X.groupby(
        [times.year, times.month, times.day, times.hour])['volumefrom'].max()
    gXdf['volumeto'] = X.groupby(
        [times.year, times.month, times.day, times.hour])['volumeto'].max()
    gXdf['high_raised'] = X.groupby(
        [times.year, times.month, times.day, times.hour])['high_raised'].max()
    gXdf['low_raised'] = X.groupby(
        [times.year, times.month, times.day, times.hour])['low_raised'].max()
    gXdf['close_raised'] = X.groupby(
        [times.year, times.month, times.day,
         times.hour])['close_raised'].max()

    print("X_grtdf")
    #print(X_grtdf)
    print("X_gtdf")
    #print(X_gtdf)
    cols = [
        'retweeter_followers', 'retweet_count', 'sum_posmulrfollower',
        'sum_negmulrfollower', 'sum_neumulrfollower', 'sum_compmulrfollower'
    ]
    #type(data)
    # gXdf['retweeter_followers']=coin.grtdf['retweeter_followers']
    # gXdf['retweet_count']=coin.grtdf['retweet_count']
    # data[]=coin.grtdf[]

    print("renaming cols")
    gXdf.index = gXdf.index.rename(['year', 'month', 'day', 'hour'])

    gXdf = gXdf.merge(X_grtdf, how='left', left_index=True, right_index=True)
    Xdf = gXdf.merge(X_gtdf, how='left', left_index=True, right_index=True)
    data = Xdf

    data.reset_index(inplace=True)
    cointrain.spreadtweeteffect(data)

    cointrain.add_change_columns(data)

    data.fillna(0, inplace=True)
    data.drop(
        columns=[
            'year',
            'month',
            'hour',
            'open',
            'high',
            'low',
            'close',
            'volumefrom',
            'volumeto',
            #                   'vf_change1','vt_change1',
            #                   'vfvt_ratio','vtvf_ratio',
            #                    'c_o_change', 'h_o_change', 'l_o_change',
            #        'c_o_change1', 'h_o_change1', 'l_o_change1', 'o_change1', 'o_change2',
            #        'o_change3', 'o_change4', 'o_change5', 'o_change6', 'o_change1_3',
            #        'o_change1_12',
            #                     'retweeter_followers',
            #        'retweet_count',
            #        'sum_posmulrfollower', 'sum_negmulrfollower',
            #        'sum_neumulrfollower', 'sum_compmulrfollower', 'follower_count',
            #        'tweet_count',
            #        'sum_posmulfollower', 'sum_negmulfollower',
            #        'sum_neumulfollower', 'sum_compmulfollower',
            'asia_market',
            'eu_market',
            'us_market',
            'day',
            'max_datetime_x',
            'max_datetime_y'
        ],
        inplace=True)

    #data = data[COLS]
    coin.data_to_predict = data
    print(data.tail())
    condition = data.columns != shouldbe_cols
    if (len(condition[condition == True]) > 0):  #data.columns!=shouldbe_cols):
        print("Columns/Features are not the same as in the model, exiting")
        print("which col not equal: ", data.columns != shouldbe_cols)
        print("shouldbe_cols ")
        print(shouldbe_cols)
        print('data.columns')
        print(data.columns)
        exit(1)

    print("saving to storeage... ")
    spec_hour_str = str(specific_hour.strftime("%Y-%m-%d_%H-%M-%S"))
    coin.save_to_storeage(PHASE, tmpdir='runtime/' + spec_hour_str + '/')
    print("saving is done.")

    min_max_scaler = coin.read_scaler()
    scaled_data = min_max_scaler.transform(data)
    data = pd.DataFrame(scaled_data)

    #print(data.tail())

    # load json and create model
    def precision(y_true, y_pred):
        threshold = 0.3
        mult = 0.5 / threshold
        true_positives = K.sum(K.round(K.clip(y_true * y_pred * mult, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred * mult, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    def recall(y_true, y_pred):
        threshold = 0.3
        mult = 0.5 / threshold
        true_positives = K.sum(K.round(K.clip(y_true * y_pred * mult, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    #metrics=[precision,recall,'accuracy']
    metrics = {"precision": precision, 'recall': recall}
    print("loading model for coin:" + coin.name)
    #model = load_model("./data/altcoin-storage/"+coin.name+"_keras_model.h5",custom_objects=metrics)
    #loading once>
    p = Prediction()
    model = p.load_model(coin, metrics)

    print("doing predictions.")
    pred = model.predict(data)
    print("predictions are ready.")

    coinbinancename = coin.name.upper() + "BTC"
    chance = pred[len(pred) - 1][0]

    signal = 0
    treshold = coin.treshold
    if chance > treshold:
        signal = 1

    #generating prediction
    du = DateUtil()
    specific_hour_minus_one = specific_hour + timedelta(hours=-1)
    pred = p.generate_prediction(specific_hour_minus_one, specific_hour,
                                 coinbinancename, chance, treshold, signal)
    dbconn.save_predictions([pred])
    predictions.append(pred)

    return predictions
Example #9
0
    def test_dateutil0(self):
        dateutil = DateUtil()

        print(
            datetime.fromtimestamp(
                int("1523689200")).strftime('%Y-%m-%d %H:%M:%S'))
Example #10
0
 def __init__(self):
     print("init TweetIO")
     self.dateutil=DateUtil()
Example #11
0
class TweetIO:
    def __init__(self):
        print("init TweetIO")
        self.dateutil=DateUtil()


    def read_db_tweet_last_n_hour_by_specific_hour_by_coin(self, coin,specific_hour,n_hours=48):

        df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id','user_name','retweet_count','favorite_count',
                                 'timestamp','lang'])

        tsto=self.dateutil.round_datetime_down(specific_hour)

        tsfrom=self.dateutil.round_datetime_down(specific_hour+timedelta(hours=-1*n_hours))

        for tag in coin.hashtags:
            print("getting ",tag," from: ",tsfrom," to: ",tsto)
            df=self.read_db_tweet(tsfrom,tsto,tag,df)
        return df



    def read_db_tweet_last_n_hour_by_coin(self, coin):

        df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id','user_name','retweet_count','favorite_count',
                                 'timestamp','lang'])

        tsfrom = self.dateutil.two_days_ago()

        tsto = self.dateutil.last_round_hour()

        for tag in coin.hashtags:
            print("getting ",tag," from: ",tsfrom," to: ",tsto)
            df=self.read_db_tweet(tsfrom,tsto,tag,df)
        return df



    def read_db_tweet(self,tsfrom,tsto,searchtext,df=pd.DataFrame(columns=['id','text','user_id','user_history_row_id',
                                                                           'user_name','retweet_count','favorite_count',
                                                                           'timestamp','lang'])):

        select_query = """
        select * from tweet where created_at > '"""+tsfrom+"""' and created_at < '"""+tsto+"""' and text like 
        '%"""+searchtext+"""%' and lang like 'en';
        """
        print(select_query)

        rows=dbconn.query(select_query)

        i=len(df)
        print("len rows",len(rows))
        print("i from ",i)

        for dbrow in rows:
            df.at[i,'id']=dbrow['id']
            df.at[i,'text']=dbrow['text']
            df.at[i,'user_id']=dbrow['user_id_str']
            df.at[i,'user_history_row_id']=dbrow['user_history_row_id']
            df.at[i,'user_name']=dbrow['user_name']
            df.at[i,'retweet_count']=dbrow['retweet_count']
            df.at[i,'favorite_count']=dbrow['favorite_count']
            df.at[i,'timestamp']=dbrow['created_at']
            df.at[i,'lang']=dbrow['lang']
            i+=1
        print("i until ",i)
        print("len(df)",len(df))
        return df


    def read_db_retweet_last_n_hour_by_specific_hour_by_coin(self, coin,specific_hour,n_hours=48):

        df=pd.DataFrame(columns=['retweet_id','orig_tweet_id',
                                 'retweeter_followers','retweet_created_at',
                                 'user_history_row_id','user_id_str'])

        tsto=self.dateutil.round_datetime_down(specific_hour)

        tsfrom=self.dateutil.round_datetime_down(specific_hour+timedelta(hours=-1*n_hours))

        for tag in coin.hashtags:
            print("getting ",tag," from: ",tsfrom," to: ",tsto)
            df=self.read_db_retweet(tsfrom,tsto,tag,df)
        return df


    def read_db_retweet_last_n_hour_by_coin(self, coin):

        df=pd.DataFrame(columns=['retweet_id','orig_tweet_id',
                                 'retweeter_followers','retweet_created_at',
                                 'user_history_row_id','user_id_str'])

        tsfrom=self.dateutil.two_days_ago()

        tsto=self.dateutil.last_round_hour()

        for tag in coin.hashtags:
            print("getting ",tag," from: ",tsfrom," to: ",tsto)
            df=self.read_db_retweet(tsfrom,tsto,tag,df)
        return df


    def read_db_retweet(self,tsfrom,tsto,searchtext,df=pd.DataFrame(columns=['retweet_id','orig_tweet_id',
                                                                             'retweeter_followers','retweet_created_at',
                                                                             'user_history_row_id','user_id_str'])):

        select_query = """
        select * from retweet where retweet_created_at > '"""+tsfrom+"""' and retweet_created_at < '"""+tsto+"""' and
         orig_tweet_id in (select id from tweet where text like '%"""+searchtext+"""%'  and lang like 'en');
        """
        print(select_query)

        rows=dbconn.query(select_query)

        i=len(df)
        for dbrow in rows:
            df.at[i,'retweet_id']=dbrow['retweet_id']
            df.at[i,'orig_tweet_id']=dbrow['orig_tweet_id']
            df.at[i,'retweeter_followers']=dbrow['retweeter_followers']
            df.at[i,'retweet_created_at']=dbrow['retweet_created_at']
            df.at[i,'user_history_row_id']=dbrow['user_history_row_id']
            df.at[i,'user_id_str']=dbrow['user_id_str']
            i+=1
        return df

    def read_db_referenced_users(self,coin):
        tdf=coin.tweets
        rtdf=coin.retweets
        referenced_user_ids=[]
        for index,row in tdf.iterrows():
            referenced_user_ids.append(row['user_history_row_id'])
        for index,row in rtdf.iterrows():
            referenced_user_ids.append(row['user_history_row_id'])

        return self.read_db_users(referenced_user_ids)

    def read_db_users(self,referenced_user_ids):
        df=pd.DataFrame(columns=['user_row_id','twitter_user_id','user_name',
                                 'follower_count','friends_count','listed_count',
                                 'favourites_count','statuses_count','user_created_at'])
        ruids=str(referenced_user_ids)
        ruids=ruids[1:]
        ruids=ruids[:-1]
        select_query = """
        select * from tweet_user_history where id in ("""+ruids+""");
        """
        print(select_query)
        rows=dbconn.query(select_query)

        i=len(df)
        for dbrow in rows:
            df.at[i,'user_row_id']=dbrow['id']
            df.at[i,'twitter_user_id']=dbrow['user_id']
            df.at[i,'user_name']=dbrow['user_name']
            df.at[i,'follower_count']=dbrow['followers_count']
            df.at[i,'friends_count']=dbrow['friends_count']
            df.at[i,'listed_count']=dbrow['listed_count']
            df.at[i,'favourites_count']=dbrow['favourites_count']
            df.at[i,'statuses_count']=dbrow['statuses_count']
            df.at[i,'user_created_at']=dbrow['user_created_at']
            i+=1
        return df


    def read_all_scraped_tweet(self,coin,tmpdir=''):
        coin_name = coin.name
        dir = './data/'+tmpdir+'altcoin-tweets/' + coin_name + '/'
        print("reading tweet files from: "+dir)
        ci=CoinInfo()
        dflist=[]
        list=ci.list_tweetfiles(dir)
        for tweetfile in list:
            print("reading in: "+tweetfile)

            df=pd.read_json(path_or_buf=tweetfile)
            dflist.append(df)
        df=pd.concat(dflist)
        print("collected tweets: " + str(df['id'].count()))
        print("dropping duplicate tweets: ")
        df=df.drop_duplicates('id')
        print("collected tweets: " +str(df['id'].count()))
        df.reset_index(inplace=True)
        return df

    def read_all_scraped_retweet(self,coin,tmpdir=''):
        coin_name = coin.name
        dir = './data/'+tmpdir+'altcoin-tweets/' + coin_name + '/'
        print("reading RETWEET files from: "+dir)
        ci=CoinInfo()
        dflist=[]
        list=ci.list_retweetfiles(dir)
        for retweetfile in list:
            print("reading in: "+retweetfile)
            df=pd.read_csv(retweetfile)
            dflist.append(df)
        df=pd.concat(dflist)
        print("collected retweets: " + str(df['retweet_id'].count()))
        print("dropping duplicate tweets: ")
        df=df.drop_duplicates('retweet_id')
        print("collected tweets: " +str(df['retweet_id'].count()))
        df.reset_index(inplace=True)
        return df

    def read_users_for_tweets(self,coin,tmpdir=''):
        cname=coin.name
        userdf=pd.read_csv('./data/'+tmpdir+'altcoin-tweets/'+cname+'/users_of_'+cname+'.csv')
        print("before merge: coin.tweets", len(coin.tweets.index))
        print("before merge: userdf", len(userdf.index))
        #print(coin.tweets.head())
        coin.tweets=coin.tweets.merge(userdf, left_on='user', right_on='t_userid', how='inner')

    def sort_and_clip(self,df,date):
        df['tstamp']=pd.to_datetime(df["timestamp"])
        df['t_int']=pd.to_numeric(df['tstamp'])
        df=df.sort_values('t_int')
        df=df[df['t_int']>self.date_to_int(date)]


        #df['score']=(1+df['likes']*1+df['replies']*1)*1#+df['replies']

        df['count']=1
        df['# Tweets Cumulative']=df['count'].cumsum()
        #df['Likes, Replies, Retweets']=df['score']*1
        return df

    def date_to_int(self,strclipdate):
        t=pd.to_datetime(strclipdate)
        s=pd.Series([t])
        ti=pd.to_numeric(s)
        return ti.at[0]
Example #12
0
    def test_dateutil0(self):
        dateutil = DateUtil()

        self.print_unix_as_date("1523692799000")