Ejemplo n.º 1
0
def get_position_prediction_move(symbol):
    features = get_features_stats(symbol)

    current_values = {}
    current_values['Change'] = features['Current Change']
    current_values['MACD'] = features['Current MACD']

    with open(get_root_dir() +
              "/data/backtest/{}/stats.json".format(symbol)) as f:
        stats = json.load(f)

    current_position = "None"

    if stats['open'] > 0:
        trades = pd.read_csv(get_root_dir() +
                             "/data/backtest/{}/trades.csv".format(symbol))
        pos = trades['Type'].iloc[-1]
        if pos == "BUY":
            current_position = 'LONG'
        elif pos == "SELL":
            current_position = 'SHORT'
        elif current_position == 'REJECTION':
            pos = trades['Type'].iloc[-2]

            if pos == "BUY":
                current_position = 'LONG'
            elif pos == "SELL":
                current_position = 'SHORT'
            else:
                current_position = 'REJECTION'

    current_prediction = "None"

    json_info = get_json_info(symbol)

    if ((current_values['MACD'] > json_info['long_macd_threshold'])
            and (current_values['Change'] < json_info['long_per_threshold'])):
        current_prediction = "LONG"
    elif ((current_values['MACD'] < json_info['short_macd_threshold'])
          and (current_values['Change'] > json_info['short_per_threshold'])):
        current_prediction = "SHORT"

    current_move = "HODL"

    if current_position == "LONG":
        if current_prediction == "SHORT":
            current_move = "CLOSE"
    elif current_position == "SHORT":
        if current_prediction == "LONG":
            current_move = "CLOSE"
    else:
        current_move = "WAIT AND WATCH"

    return current_values, current_position, current_prediction, current_move
Ejemplo n.º 2
0
def get_features_stats(symbol):
    stats = {}

    curr_folder = "algorithm/data/backtest/{}".format(symbol)

    features = pd.read_csv(curr_folder + "/data.csv")

    with open(get_root_dir() + "/data/parameters.json") as f:
        json_info = json.load(f)

    features['Change'] = (
        (features['Open'] / features.shift(4)['Open']) - 1) * 100
    features['Change'] = features['Change'].fillna(0)

    stats['MACD Long Fulfilled'] = len(
        features[features['macd'] > json_info['long_macd_threshold']])
    stats['Long Change Fulfilled'] = len(
        features[features['Change'] < json_info['long_per_threshold']])
    stats['Long All Fulfilled'] = len(
        features[(features['macd'] > json_info['long_macd_threshold'])
                 & (features['Change'] < json_info['long_per_threshold'])])

    stats['MACD Short Fulfilled'] = len(
        features[features['macd'] < json_info['short_macd_threshold']])
    stats['Short Change Fulfilled'] = len(
        features[features['Change'] > json_info['short_per_threshold']])
    stats['Short All Fulfilled'] = len(
        features[(features['macd'] < json_info['short_macd_threshold'])
                 & (features['Change'] > json_info['short_per_threshold'])])

    stats['Current Change'] = features.iloc[-1]['Change']
    stats['Current MACD'] = features.iloc[-1]['macd']

    return stats
Ejemplo n.º 3
0
def get_file_name():
    fname = get_root_dir() + '/data/temp/rescraped.csv'

    if os.path.isfile(fname):
        pass
    else:
        # create output file and add header
        with open(fname, 'w') as csvfile:
            writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)

            header = [
                'timestamp', 'id', 'text', 'likes', 'retweets', 'username',
                'user_id', 'user_created_at', 'in_response_to',
                'in_response_to_user_id', 'response_type', 'has_geolocation',
                'is_verified', 'total_tweets', 'total_followers',
                'total_following', 'total_likes', 'total_lists',
                'has_background', 'is_protected', 'default_profile'
            ]

            writer.writerow(header)

    return fname
def twitter_stream():
    df = pd.read_csv(os.path.join(get_root_dir(), 'keywords.csv'))
    search_query = []

    for row in df['Keywords']:
        currKeywords = [x.strip() for x in row.split(',')]
        search_query = search_query + currKeywords

    logger = logging.getLogger(__name__)

    # auth & api handlers
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    print('Authenticated as %s' % api.me().screen_name)

    myStreamListener = MyStreamListener()
    myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener)

    while True:
        print("Starting stream tracking")
        try:
            myStream.filter(track=search_query, languages=['en'])
        except Exception as e:
            print('error')
            # Network error or stream failing behind
            # https://github.com/tweepy/tweepy/issues/448
            # prevent stream from crashing & attempt to recover
            logger.info(e)
            print(e)
            continue
def get_file_name():
    fname = os.path.join(get_root_dir(), 'data/twitter_stream/' + str(datetime.datetime.now().date()) + '.csv')
    
    stream_dir = os.path.join(get_root_dir(), 'data/twitter_stream')

    if not os.path.isdir(stream_dir):
        os.makedirs(stream_dir)

    if os.path.isfile(fname):
        pass
    else:
        # create output file and add header
        with open(fname, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            header = ['timestamp','id','text','likes','retweets','username','user_id','user_created_at','in_response_to', 'in_response_to_user_id', 'response_type', 'has_geolocation', 'is_verified', 'total_tweets', 'total_followers', 'total_following', 'total_likes', 'total_lists', 'has_background', 'is_protected', 'default_profile']
            writer.writerow(header)
    
    return fname
Ejemplo n.º 6
0
def weekly_process():
    dir = get_root_dir()
    storagefolder = os.path.join(dir, 'data/storage/all_cleaned')

    storagesfiles = glob(storagefolder + "/*")
    combined = merge_csvs(storagesfiles)
    df = pd.read_csv(combined)

    df['Time'] = pd.to_datetime(df['Time'])
    df = create_cascades(df)

    df = df.sort_values('Time')

    counts = df['cascade'].value_counts().reset_index()
    ids_count = counts[counts['cascade'] > 3][['index']]
    non_existing = ids_count[~ids_count['index'].isin(df['ID'])]

    df, profile = rescrape_and_add(df, non_existing)
    df = df.merge(profile[['username', 'total_followers']],
                  left_on='User',
                  right_on='username',
                  how='inner')
    df = df.rename(columns={
        'Time': 'time',
        'total_followers': 'magnitude',
        'User': '******'
    })
    counts = df['cascade'].value_counts().reset_index()

    df = df[df['cascade'].isin(counts[counts['cascade'] > 2]['index'])]
    oldcascade_file = os.path.join(dir, 'data/storage/old_cascade.csv')
    df = df[['ID', 'time', 'magnitude', 'user_id', 'cascade']]

    if os.path.isfile(oldcascade_file):
        old_file = pd.read_csv(oldcascade_file)
        old_file = old_file[old_file['cascade'].isin(df['cascade'])]
        df.to_csv(oldcascade_file, index=None)

        df = pd.concat([df, old_file])
        df = df.reset_index()
    else:
        df.to_csv(oldcascade_file, index=None)

    new_inf = add_influence_and_all(df)
    curr_inf = pd.read_csv(os.path.join(dir, 'data/userwise_influence.csv'))
    combined_inf = add_inf(curr_inf, new_inf)

    if 'old_file' in locals():
        to_remove = add_influence_and_all(old_file.drop('inf', axis=1))
        combined_inf = sub_inf(combined_inf, to_remove)  #test sub

    combined_inf.to_csv(os.path.join(dir, 'data/userwise_influence.csv'),
                        index=None)

    #tar and remove all_cleaned
    tar_and_remove(storagesfiles)
Ejemplo n.º 7
0
def get_sentiment(df):
    '''
    Adds sentiment to the df
    '''
    root_dir = get_root_dir()

    # s = SentimentIntensityAnalyzer()
    # df['vader_emotion'] = df['Tweet'].swifter.apply(lambda x: s.polarity_scores(x)['compound'])

    cop = df['Tweet'].copy()
    
    cop = cop.fillna("NA")

    cop = cop.replace(r'\\n',' ', regex=True) 
    cop = cop.replace(r'\n',' ', regex=True) 

    cop = cop.replace(r'\\r',' ', regex=True) 
    cop = cop.replace(r'\r',' ', regex=True) 

    cop = cop.replace(r'\\t',' ', regex=True) 
    cop = cop.replace(r'\t',' ', regex=True) 

    tempFolder = os.path.join(root_dir, "data/temp")
    tempFile = os.path.join(tempFolder, "tweets")
    outputFile = os.path.join(tempFolder, "tweets0_out.txt")

    cop.to_csv(tempFile, index=None, header=None)


    if os.path.isfile(outputFile):
        os.remove(outputFile)

    sentiFolder = os.path.join(root_dir, "utils")
    

    command = "java -jar {} sentidata {} input {}".format(os.path.join(sentiFolder, "SentiStrength.jar"), os.path.join(sentiFolder, "SentiStrength_Data/"), tempFile)
    print(command)

    process= Popen(command.split())
    process.wait()

    os.remove(tempFile)


    
    aa = pd.read_csv(outputFile, sep="\t")
    df = df.join(aa[['Positive', 'Negative']])
    os.remove(outputFile)

    df['pos_neg'] = df['Positive'] + df['Negative']

    df = df.drop(['Positive', 'Negative'], axis=1)

    return df
Ejemplo n.º 8
0
def add_keyword(df, drop_non_existing=False):
    '''
    Adds keyword to the df

    Parameters:
    ___________

    df (Dataframe): The dataframe

    drop_non_existing (Boolean): To drop non existing or not
    '''
    #do cascading here too and include the entire cascade if more than 5% of values contain a keyword. The cascade contains that keyword. This means convert to like this

    current = get_root_dir()
    path = os.path.join(current, "keywords.csv")

    keywords = pd.read_csv(path)
    keywords = keywords.set_index('Symbol')

    symbol_keyword = {}

    for idx, row in keywords.iterrows():
        currKeywords = [x.strip().lower() for x in row['Keywords'].split(',')]
        symbol_keyword[idx] = currKeywords
    

    #maybe a crypto keyword? Bitcoin should be last
    def find_which(x):
        x = x.lower()

        nonlocal symbol_keyword

        matches = []
        
        for idx,row in symbol_keyword.items():
            for keyword in row:
                if keyword in x:
                    matches.append(idx)
        
        matches = list(set(matches))

        if len(matches) == 1:
            return matches[0]
        elif len(matches) == 2:
            if 'BTC' in matches:
                req_index = 1 - matches.index('BTC')
                return matches[req_index]
        
        return "invalid"

    df['keyword'] = df['Tweet'].swifter.apply(find_which)
    return df
Ejemplo n.º 9
0
def add_influence_and_all(df):
    d = df.groupby('cascade').apply(get_influence)
    d = d.drop_duplicates()

    invalid = d[d['inf'] == 'invalid']
    invalid.to_csv(get_root_dir() + "/data/storage/invalid.csv", mode='a')
    d = d[d['inf'] != 'invalid']
    df = df.merge(d, on='ID')
    df = df.drop('cascade_y', axis=1).rename(columns={'cascade_x': 'cascade'})
    new_inf = df.groupby('user_id').apply(get_influence_metrics)

    new_inf = new_inf.reset_index().rename(columns={'user_id': 'username'})
    return new_inf
Ejemplo n.º 10
0
def price_stream():
    keywords = pd.read_csv(get_root_dir() + '/keywords.csv')

    coin_dir = get_root_dir() + "/data/price"

    if not os.path.isdir(coin_dir):
        os.makedirs(coin_dir)
    
    for idx, row in keywords.iterrows():
        start_timestamp = 1561939200000

        exchange_name = row['exchange_name']
        pairname = row['pair_name'].replace('/', '')

        current_file = coin_dir + "/{}.csv".format(row['Symbol'])
        print(current_file)
        
        all_df = pd.DataFrame()

        if os.path.isfile(current_file):
            all_df = pd.read_csv(current_file)

            if len(all_df) > 0:
                all_df['Time'] = pd.to_datetime(all_df['Time'])
                start_timestamp = all_df['Time'].astype(int).iloc[-1] // 10**6

        start_timestamp = int(start_timestamp)
        
        if row['exchange_name'] == 'Binance':
            curr_df = get_binance_data(pairname, start_timestamp)
        elif row['exchange_name'] == 'Bitfinex':
            curr_df = get_bitfinex_data(pairname, start_timestamp)

        full_df = pd.concat([all_df, curr_df])
        full_df = clean_price(full_df)

        full_df.to_csv(current_file, index=None)
Ejemplo n.º 11
0
def tar_and_remove(files):
    archive_dir = get_root_dir() + "/data/archive"

    if not os.path.isdir(archive_dir):
        os.makedirs(archive_dir)

    tar = tarfile.open(archive_dir + "/{}.tar.gz".format(int(time.time())),
                       "w:gz",
                       compresslevel=5)

    for name in files:
        tar.add(name)

    tar.close()

    for file in files:
        os.remove(file)
def get_price(symbol, duration='30Min'):
    '''
    If curr_start and curr_end is set to None, returns all, else return for given time.
    '''

    dir = get_root_dir()

    fname = dir + "/data/price/{}.csv".format(symbol)
    if not(os.path.isfile(fname)):
        print('Price data has not been downloaded. Starting Download. This might take some time')
        from price_stream import price_stream
        price_stream()

    df = pd.read_csv(fname)
    df['Time'] = pd.to_datetime(df['Time'])

    price_df = df.groupby(pd.Grouper(key='Time', freq=duration, label='right')).apply(merge_time)
    price_df = price_df.reset_index()
    return price_df
Ejemplo n.º 13
0
def rescrape_and_add(original, to_scrape):
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    print("Rescraping {} tweets".format(len(to_scrape)))
    for i in range(100, len(to_scrape) + 100, 100):
        print("{} {}".format(i - 100, i))

        tweets = api.statuses_lookup(list(to_scrape['index'][i -
                                                             100:i].values),
                                     tweet_mode='extended')

        for tweet in tweets:
            response_type = 'tweet'
            in_response_to = None

            try:
                in_response_to = tweet.in_reply_to_status_id
                in_response_to_user_id = tweet.in_reply_to_user_id_str
            except:
                pass

            if in_response_to == None:
                if hasattr(tweet, 'retweeted_status'):
                    response_type = 'retweet'
                    in_response_to = tweet.retweeted_status.id
                    in_response_to_user_id = tweet.retweeted_status.user._json[
                        'id_str']  #probably not required
                else:
                    if hasattr(tweet, 'quoted_status'):
                        response_type = 'quoted_retweet'
                        in_response_to = tweet.quoted_status.id
                        in_response_to_user_id = tweet.quoted_status.user._json[
                            'id_str']  #probably not required
                    else:
                        in_response_to = '0'
            else:
                response_type = 'reply'

            tweetText = ''
            try:
                tweetText = tweetText + tweet.extended_tweet['full_text']
            except:
                try:
                    tweetText = tweetText + tweet.full_text
                except:
                    pass

            try:
                tweetText = tweetText + ' <retweeted_status> ' + tweet.retweeted_status.extended_tweet[
                    'full_text'] + ' </retweeted_status>'
            except:
                try:
                    tweetText = tweetText + ' <retweeted_status> ' + tweet.retweeted_status.text + ' </retweeted_status>'
                except:
                    pass

            try:
                tweetText = tweetText + ' <quoted_status> ' + tweet.quoted_status.extended_tweet[
                    'full_text'] + ' </quoted_status>'
            except:
                try:
                    tweetText = tweetText + ' <quoted_status> ' + tweet.quoted_status.text + ' </quoted_status>'
                except:
                    pass

            if 'urls' in tweet.entities:
                for url in tweet.entities['urls']:
                    try:
                        tweetText = tweetText.replace(url['url'],
                                                      url['expanded_url'])
                    except:
                        pass

            write_csv([
                tweet.created_at, tweet.id, tweetText, tweet.favorite_count,
                tweet.retweet_count, tweet.user.screen_name,
                tweet.user._json['id_str'], tweet.user._json['created_at'],
                in_response_to, in_response_to_user_id, response_type,
                tweet.user.geo_enabled, tweet.user.verified,
                tweet.user.statuses_count, tweet.user.followers_count,
                tweet.user.friends_count, tweet.user.favourites_count,
                tweet.user.listed_count,
                tweet.user.profile_use_background_image, tweet.user.protected,
                tweet.user.default_profile
            ])

    rescraped = pd.read_csv(get_root_dir() + '/data/temp/rescraped.csv')
    profile = pd.read_csv(
        os.path.join(get_root_dir(), 'data/cleaned_profile.csv'))

    original['Time'] = pd.to_datetime(original['Time'])

    original['Time'] = original['Time'].astype(int) // 10**9
    rescraped_df, rescraped_profile = processor(rescraped)
    non_existing = to_scrape[~to_scrape['index'].isin(rescraped['id'])]

    virtual_tweets = non_existing['index'].apply(make_tweets,
                                                 original=original)
    rescraped = pd.concat([virtual_tweets,
                           rescraped_df]).reset_index(drop=True)
    virtual_tweets['User'] = virtual_tweets['User'].str.lower()
    rescrape = virtual_tweets[~virtual_tweets['User'].isin(profile['username']
                                                           )]
    ps = profileScraper()
    scraped = ps.query_profile(rescrape['User'].values)
    scraped_profile = process_scraped_profile(scraped)

    new_profile = pd.concat([scraped_profile, rescraped_profile,
                             profile])  #clean them seperately before concating
    new_profile = new_profile.drop_duplicates(subset=['username']).reset_index(
        drop=True)
    new_profile.to_csv(os.path.join(get_root_dir(),
                                    'data/cleaned_profile.csv'),
                       index=None)

    rescraped = get_sentiment(rescraped)
    new_df = pd.concat([original, rescraped])
    new_df = new_df.sort_values('Time')

    return new_df, new_profile
Ejemplo n.º 14
0
def get_json_info(symbol):
    with open(get_root_dir() + "/data/parameters.json") as f:
        json_info = json.load(f)

    return json_info
Ejemplo n.º 15
0
    df = df.resample('1D').apply(resampler)
    df = df.reset_index()
    df = df[['Time', 'Open']].merge(portfolioValue,
                                    left_on='Time',
                                    right_on='Date').drop('Time', axis=1)
    df['hodl'] = (initial_cash / df['Open'].iloc[0]) * df['Open']
    df = df.drop('Open', axis=1)

    df.to_csv(curr_dir + '/portfolio.csv', index=None)
    trades.to_csv(curr_dir + '/trades.csv', index=None)
    operations.to_csv(curr_dir + '/operations.csv', index=None)

    with open(os.path.join(curr_dir, "data.json"), 'w') as fp:
        json.dump(json_data, fp)


if __name__ == "__main__":
    keywords = pd.read_csv(get_root_dir() + '/keywords.csv')

    for idx, row in keywords.iterrows():
        perform_backtest(row['Symbol'],
                         n_fast_par=n_fast_par,
                         n_slow_par=n_slow_par,
                         long_macd_threshold_par=long_macd_threshold_par,
                         long_per_threshold_par=long_per_threshold_par,
                         long_close_threshold_par=long_close_threshold_par,
                         short_macd_threshold_par=short_macd_threshold_par,
                         short_per_threshold_par=short_per_threshold_par,
                         short_close_threshold_par=short_close_threshold_par,
                         initial_cash=initial_cash,
                         comission=comission)
Ejemplo n.º 16
0
def get_keywords():
    keywords = pd.read_csv(get_root_dir() + '/keywords.csv')
    return keywords
Ejemplo n.º 17
0
def perform_backtest(symbol_par,
                     n_fast_par,
                     n_slow_par,
                     long_macd_threshold_par,
                     long_per_threshold_par,
                     long_close_threshold_par,
                     short_macd_threshold_par,
                     short_per_threshold_par,
                     short_close_threshold_par,
                     initial_cash=10000,
                     comission=0.1,
                     df=None):
    '''
    Parameter:
    __________

    symbol_par (string):
    The symbol to use

    n_fast_par (int):
    Fast EMA line used during MACD calculation

    n_slow_par (int):
    Slower EMA line used during MACD calculation

    long_macd_threshold_par (int):
    The threshold of normalized macd, above which we might open a long position

    long_per_threshold_par (int):
    The value of percentage change over the last 2 hours above which we might open a long position
    #Might make this a parameter too

    long_close_threshold_par (int):
    Threshold of normalized macd, below which we will close the opened long position

    short_macd_threshold_par (int):
    The threshold of normalized macd, below which we might open a short position

    short_per_threshold_par (int):
    The value of percentage change over the last 2 hours below which we might open a short position

    short_close_threshold_par (int):
    Threshold of normalized macd, above which we will close the opened short position

    initial_cash (int) (optional):
    The cash to start from. Initiall 10k

    comission (int) (option):
    int fraction value. Defaults to 0.1%. This is much higher than normal. Staying on the safe side.

    df (Dataframe) (option):
    Uses df as features dataframe if specified. Otherwise reads the coin folder
    
    '''
    global n_fast
    global n_slow

    global long_macd_threshold
    global long_per_threshold
    global long_close_threshold
    global short_macd_threshold
    global short_per_threshold
    global short_close_threshold
    global symbol

    n_fast = n_fast_par
    n_slow = n_slow_par

    long_macd_threshold = long_macd_threshold_par
    long_per_threshold = long_per_threshold_par
    long_close_threshold = long_close_threshold_par
    short_macd_threshold = short_macd_threshold_par
    short_per_threshold = short_per_threshold_par
    short_close_threshold = short_close_threshold_par
    symbol = symbol_par

    json_info = {}
    json_info['n_fast'] = n_fast
    json_info['n_slow'] = n_slow
    json_info['long_macd_threshold'] = long_macd_threshold_par
    json_info['long_per_threshold'] = long_per_threshold_par
    json_info['long_close_threshold'] = long_close_threshold_par
    json_info['short_macd_threshold'] = short_macd_threshold_par
    json_info['short_per_threshold'] = short_per_threshold_par
    json_info['short_close_threshold'] = short_close_threshold_par
    json_info['initial_cash'] = initial_cash
    json_info['comission'] = comission

    with open(get_root_dir() + "/data/parameters.json", 'w') as f:
        json.dump(json_info, f)

    features_file = get_root_dir() + "/data/features/{}.csv".format(symbol)

    if df is None:
        df = pd.read_csv(features_file)

    df['macd'] = ta.trend.macd(df['sentistrength_total'],
                               n_fast=n_fast,
                               n_slow=n_slow,
                               fillna=True)
    df['macd'] = df['macd'].fillna(0)

    df['Time'] = pd.to_datetime(df['Time'])

    json_data = {}

    json_data['mean'] = df['macd'].mean()
    json_data['std'] = df['macd'].std()

    df['macd'] = (df['macd'] - json_data['mean']) / json_data['std']

    df = df.dropna(subset=['Time'])

    curr_dir = get_root_dir() + "/data/backtest/{}".format(symbol)

    if not os.path.exists(curr_dir):
        os.makedirs(curr_dir)

    fig = create_plot(df, 'macd', 'SentiStength')

    plotly_json = fig.to_json()

    html = fig.to_html()

    with open(curr_dir + '/plotly.html', 'w') as file:
        file.write(html)

    with open(curr_dir + '/plotly.json', 'w') as file:
        file.write(plotly_json)

    df = df[['Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'macd']]
    df.to_csv(os.path.join(curr_dir, "data.csv"), index=None)

    data = PandasData_Custom(dataname=df)
    cerebro = bt.Cerebro(cheat_on_open=True, maxcpus=None)
    cerebro.adddata(data)

    cerebro.addstrategy(tradeStrategy)

    cerebro.addanalyzer(bt.analyzers.SharpeRatio_A)
    cerebro.addanalyzer(bt.analyzers.Calmar)
    cerebro.addanalyzer(bt.analyzers.DrawDown)
    cerebro.addanalyzer(bt.analyzers.Returns)
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer)

    cerebro.broker.setcash(initial_cash)
    cerebro.broker.setcommission(comission / 100)

    run = cerebro.run()

    analysis = run[0].analyzers.getbyname('tradeanalyzer').get_analysis()

    trade_analyzer = {}
    trade_analyzer['total'] = analysis['total']['total']
    trade_analyzer['open'] = analysis['total']['open']
    trade_analyzer['closed'] = analysis['total']['closed']

    with open(curr_dir + "/stats.json", 'w') as f:
        json.dump(trade_analyzer, f)

    portfolioValue, trades, operations = run[0].get_logs()

    # fig = cerebro.plot()
    # figure = fig[0][0]
    # figure.savefig(curr_dir + "/backtest.png")

    output_file(curr_dir + "/backtest.html")
    b = Bokeh(style='bar', plot_mode="tabs", scheme=Tradimo())
    b.plot_result(run)

    df = df.set_index('Time')
    df = df.resample('1D').apply(resampler)
    df = df.reset_index()
    df = df[['Time', 'Open']].merge(portfolioValue,
                                    left_on='Time',
                                    right_on='Date').drop('Time', axis=1)
    df['hodl'] = (initial_cash / df['Open'].iloc[0]) * df['Open']
    df = df.drop('Open', axis=1)

    df.to_csv(curr_dir + '/portfolio.csv', index=None)
    trades.to_csv(curr_dir + '/trades.csv', index=None)
    operations.to_csv(curr_dir + '/operations.csv', index=None)

    with open(os.path.join(curr_dir, "data.json"), 'w') as fp:
        json.dump(json_data, fp)
Ejemplo n.º 18
0
def get_features(tweet_df,
                 price_df,
                 coin_name,
                 curr_start,
                 curr_end,
                 minutes=30):
    '''
    Parameters:
    ___________
    tweet_df (DataFrame):
    Dataframe of tweets for the current coin
    
    price_df (DataFrame):
    Dataframe of price of the current coin
    
    coin_name (string):
    Name of coin
    
    curr_start (Timestamp):
    Starting time of the current all_cleaned
    
    curr_end (Timestamp):
    Ending time of the current all_cleaned
    '''
    features_dir = get_root_dir() + "/data/features"

    if not os.path.isdir(features_dir):
        os.makedirs(features_dir)

    features_file = features_dir + '/{}.csv'.format(coin_name)

    userwise_inf_file = os.path.join(get_root_dir(),
                                     'data/userwise_influence.csv')
    userwise_inf = pd.read_csv(userwise_inf_file)

    tweet_df['Time'] = pd.to_datetime(tweet_df['Time'])
    tweet_df = tweet_df.sort_values('Time')

    tweet_df = tweet_df.merge(
        userwise_inf[['username', 'avg_influence', 'total_influence']],
        left_on='User',
        right_on='username',
        how='left')
    tweet_df['avg_influence'] = tweet_df['avg_influence'].fillna(
        2
    )  #half the average if that user does not exist. This number goes down as our dataset goes up
    tweet_df['total_influence'] = tweet_df['total_influence'].fillna(
        6)  #half the average for same reason

    price_df = price_df[(price_df['Time'] >= curr_start)
                        & (price_df['Time'] <= curr_end)].reset_index(
                            drop=True)
    features = tweet_df.groupby('Time').apply(tweets_to_features)
    features = price_df.merge(features, how='left', on='Time')
    features = features.fillna(0)

    if os.path.isfile(features_file):
        features = pd.concat([pd.read_csv(features_file), features])
        features['Time'] = pd.to_datetime(features['Time'])
        features = features.sort_values('Time')
        features = features.drop_duplicates('Time', keep='last')

    features.to_csv(features_file, index=None)

    return features
Ejemplo n.º 19
0
if __name__ == "__main__":
    twitterThread = Thread(target=twitter_stream)
    twitterThread.start()

    print('Started Live Tweet Collection')
    price_stream()
    #Streaming price first time

    print('Started Live Price Collection')

    priceFlag = Event()
    thread = MyThread(priceFlag, price_stream, 30)
    thread.start()

    dir = get_root_dir()
    temp_dir = os.path.join(dir, 'data/temp')

    if not os.path.isdir(temp_dir):
        os.makedirs(temp_dir)

    oneFlag = Event()
    oneThread = MyThread(oneFlag, one_minute_cleaning, 60)
    oneThread.start()

    tenFlag = Event()
    tenThread = MyThread(oneFlag, ten_minute_profile, 60)
    tenThread.start()

    tenBacktestFlag = Event()
    tenBacktestThread = MyThread(oneFlag, ten_minutes_backtest, 60)
Ejemplo n.º 20
0
def index(request):
    files = glob('algorithm/data/backtest/*')

    coinwise_stats = {}
    combined_portfolio = pd.DataFrame()
    btc = pd.read_csv('algorithm/data/backtest/BTC/portfolio.csv')
    combined_portfolio['Date'] = btc['Date']
    dates = []

    for file in files:
        symbol = file.split('/')[-1].replace('.csv', '')
        curr_df, coinwise_stats[symbol] = get_stats(symbol)
        combined_portfolio = curr_df.merge(combined_portfolio,
                                           on='Date',
                                           how='right')

    combined_portfolio['btc_portfolio'] = btc['hodl']
    combined_portfolio['btc_portfolio'] = combined_portfolio[
        'btc_portfolio'] * (len(combined_portfolio.columns) - 2)

    df = pd.DataFrame.from_dict(coinwise_stats)
    df = df.T

    combined_portfolio = combined_portfolio.set_index('Date')

    print(combined_portfolio.columns)
    div = pd.DataFrame(
        ((combined_portfolio.iloc[-1] / combined_portfolio.iloc[0]) - 1) *
        100).reset_index()
    div.columns = ['Symbol', 'Change']

    div['Change'] = div['Change'].round(2)
    div = div.sort_values('Change', ascending=False)

    combined_portfolio['portfolio'] = combined_portfolio.drop(
        'btc_portfolio', axis=1).sum(axis=1)
    combined_portfolio = combined_portfolio[['portfolio', 'btc_portfolio']]

    combined_portfolio = combined_portfolio.reset_index()
    combined_portfolio = combined_portfolio.rename(columns={'Date': 'Time'})

    fig = create_plot(combined_portfolio, 'portfolio', 'Portfolio Movement',
                      'btc_portfolio', 'Bitcoin HODL Portfolio')
    html = fig.to_html()

    with open('interface/static/interface/plotly.html', 'w') as file:
        file.write(html)

    dictionary = dict(zip(div.Symbol, div.Change))
    print(dictionary)

    top_ten = ['TRX', 'OMG', 'MIOTA', 'ZEC', 'LTC', 'ETC', 'XTZ', 'BSV', 'SAN']

    btc['Date'] = pd.to_datetime(btc['Date'])
    top_df = df.loc[top_ten]

    forward_metrics = {}
    forward_metrics['Started From'] = btc.iloc[0]['Date'].strftime('%Y-%m-%d')
    forward_metrics['Traded for'] = str(
        (btc.iloc[-1]['Date'] - btc.iloc[0]['Date']).days) + ' days'
    forward_metrics['Total Return'] = str(
        round(
            (sum(df['end_cash']) / sum(df['start_cash']) - 1) * 100, 2)) + " %"
    forward_metrics['Total Return - Predetermined Coins'] = str(
        round((sum(top_df['end_cash']) / sum(top_df['start_cash']) - 1) * 100,
              2)) + " %"
    forward_metrics['Return VS hodl all coins'] = str(
        round((sum(df['end_cash']) / sum(df['end_hodl']) - 1) * 100, 2)) + " %"

    # forward_metrics['Bitcoin hodl VS Portfolio'] = dictionary['btc_portfolio']
    del dictionary['btc_portfolio']

    symbols = get_symbols()

    #Features info calculation
    files = glob('algorithm/data/backtest/*')
    features_df = pd.DataFrame()
    position_df = pd.DataFrame(
        columns=['Symbol', 'Change', 'MACD', 'Position', 'Prediction', 'Move'])

    for file in files:
        symbol = file.split('/')[-1].replace('.csv', '')
        curr_features = get_features_stats(symbol)
        curr_features['Symbol'] = symbol
        features_df = features_df.append(pd.Series(curr_features),
                                         ignore_index=True)
        current_values, current_position, current_prediction, current_move = get_position_prediction_move(
            symbol)
        position_df = position_df.append(pd.Series({
            'Symbol':
            symbol,
            'Change':
            round(current_values['Change'], 2),
            'MACD':
            round(current_values['MACD'], 2),
            'Position':
            current_position,
            'Prediction':
            current_prediction,
            'Move':
            current_move
        }),
                                         ignore_index=True)

    with open(get_root_dir() + "/data/parameters.json") as f:
        json_info = json.load(f)

    features_df = features_df[['Symbol'] + list(features_df.columns[:-1])]

    features_df = features_df.sort_values('Current MACD').reset_index(
        drop=True)
    features_df = features_df.round(2)

    predictions_df = position_df[
        position_df['Prediction'] != "None"].reset_index(drop=True)
    position_df = position_df[position_df['Position'] != 'None'].reset_index(
        drop=True)

    return render(
        request, "interface/index.html", {
            'forward_metrics': forward_metrics,
            'current_parameters': json_info,
            'all_time_coinwise': dictionary,
            'symbols': symbols,
            'features': features_df.values.tolist(),
            'features_header': list(features_df.columns),
            'predictions': predictions_df.values.tolist(),
            'positions_predictions_headers': list(position_df.columns),
            'positions': position_df.values.tolist()
        })