def get_position_prediction_move(symbol): features = get_features_stats(symbol) current_values = {} current_values['Change'] = features['Current Change'] current_values['MACD'] = features['Current MACD'] with open(get_root_dir() + "/data/backtest/{}/stats.json".format(symbol)) as f: stats = json.load(f) current_position = "None" if stats['open'] > 0: trades = pd.read_csv(get_root_dir() + "/data/backtest/{}/trades.csv".format(symbol)) pos = trades['Type'].iloc[-1] if pos == "BUY": current_position = 'LONG' elif pos == "SELL": current_position = 'SHORT' elif current_position == 'REJECTION': pos = trades['Type'].iloc[-2] if pos == "BUY": current_position = 'LONG' elif pos == "SELL": current_position = 'SHORT' else: current_position = 'REJECTION' current_prediction = "None" json_info = get_json_info(symbol) if ((current_values['MACD'] > json_info['long_macd_threshold']) and (current_values['Change'] < json_info['long_per_threshold'])): current_prediction = "LONG" elif ((current_values['MACD'] < json_info['short_macd_threshold']) and (current_values['Change'] > json_info['short_per_threshold'])): current_prediction = "SHORT" current_move = "HODL" if current_position == "LONG": if current_prediction == "SHORT": current_move = "CLOSE" elif current_position == "SHORT": if current_prediction == "LONG": current_move = "CLOSE" else: current_move = "WAIT AND WATCH" return current_values, current_position, current_prediction, current_move
def get_features_stats(symbol): stats = {} curr_folder = "algorithm/data/backtest/{}".format(symbol) features = pd.read_csv(curr_folder + "/data.csv") with open(get_root_dir() + "/data/parameters.json") as f: json_info = json.load(f) features['Change'] = ( (features['Open'] / features.shift(4)['Open']) - 1) * 100 features['Change'] = features['Change'].fillna(0) stats['MACD Long Fulfilled'] = len( features[features['macd'] > json_info['long_macd_threshold']]) stats['Long Change Fulfilled'] = len( features[features['Change'] < json_info['long_per_threshold']]) stats['Long All Fulfilled'] = len( features[(features['macd'] > json_info['long_macd_threshold']) & (features['Change'] < json_info['long_per_threshold'])]) stats['MACD Short Fulfilled'] = len( features[features['macd'] < json_info['short_macd_threshold']]) stats['Short Change Fulfilled'] = len( features[features['Change'] > json_info['short_per_threshold']]) stats['Short All Fulfilled'] = len( features[(features['macd'] < json_info['short_macd_threshold']) & (features['Change'] > json_info['short_per_threshold'])]) stats['Current Change'] = features.iloc[-1]['Change'] stats['Current MACD'] = features.iloc[-1]['macd'] return stats
def get_file_name(): fname = get_root_dir() + '/data/temp/rescraped.csv' if os.path.isfile(fname): pass else: # create output file and add header with open(fname, 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) header = [ 'timestamp', 'id', 'text', 'likes', 'retweets', 'username', 'user_id', 'user_created_at', 'in_response_to', 'in_response_to_user_id', 'response_type', 'has_geolocation', 'is_verified', 'total_tweets', 'total_followers', 'total_following', 'total_likes', 'total_lists', 'has_background', 'is_protected', 'default_profile' ] writer.writerow(header) return fname
def twitter_stream(): df = pd.read_csv(os.path.join(get_root_dir(), 'keywords.csv')) search_query = [] for row in df['Keywords']: currKeywords = [x.strip() for x in row.split(',')] search_query = search_query + currKeywords logger = logging.getLogger(__name__) # auth & api handlers auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) print('Authenticated as %s' % api.me().screen_name) myStreamListener = MyStreamListener() myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener) while True: print("Starting stream tracking") try: myStream.filter(track=search_query, languages=['en']) except Exception as e: print('error') # Network error or stream failing behind # https://github.com/tweepy/tweepy/issues/448 # prevent stream from crashing & attempt to recover logger.info(e) print(e) continue
def get_file_name(): fname = os.path.join(get_root_dir(), 'data/twitter_stream/' + str(datetime.datetime.now().date()) + '.csv') stream_dir = os.path.join(get_root_dir(), 'data/twitter_stream') if not os.path.isdir(stream_dir): os.makedirs(stream_dir) if os.path.isfile(fname): pass else: # create output file and add header with open(fname, 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) header = ['timestamp','id','text','likes','retweets','username','user_id','user_created_at','in_response_to', 'in_response_to_user_id', 'response_type', 'has_geolocation', 'is_verified', 'total_tweets', 'total_followers', 'total_following', 'total_likes', 'total_lists', 'has_background', 'is_protected', 'default_profile'] writer.writerow(header) return fname
def weekly_process(): dir = get_root_dir() storagefolder = os.path.join(dir, 'data/storage/all_cleaned') storagesfiles = glob(storagefolder + "/*") combined = merge_csvs(storagesfiles) df = pd.read_csv(combined) df['Time'] = pd.to_datetime(df['Time']) df = create_cascades(df) df = df.sort_values('Time') counts = df['cascade'].value_counts().reset_index() ids_count = counts[counts['cascade'] > 3][['index']] non_existing = ids_count[~ids_count['index'].isin(df['ID'])] df, profile = rescrape_and_add(df, non_existing) df = df.merge(profile[['username', 'total_followers']], left_on='User', right_on='username', how='inner') df = df.rename(columns={ 'Time': 'time', 'total_followers': 'magnitude', 'User': '******' }) counts = df['cascade'].value_counts().reset_index() df = df[df['cascade'].isin(counts[counts['cascade'] > 2]['index'])] oldcascade_file = os.path.join(dir, 'data/storage/old_cascade.csv') df = df[['ID', 'time', 'magnitude', 'user_id', 'cascade']] if os.path.isfile(oldcascade_file): old_file = pd.read_csv(oldcascade_file) old_file = old_file[old_file['cascade'].isin(df['cascade'])] df.to_csv(oldcascade_file, index=None) df = pd.concat([df, old_file]) df = df.reset_index() else: df.to_csv(oldcascade_file, index=None) new_inf = add_influence_and_all(df) curr_inf = pd.read_csv(os.path.join(dir, 'data/userwise_influence.csv')) combined_inf = add_inf(curr_inf, new_inf) if 'old_file' in locals(): to_remove = add_influence_and_all(old_file.drop('inf', axis=1)) combined_inf = sub_inf(combined_inf, to_remove) #test sub combined_inf.to_csv(os.path.join(dir, 'data/userwise_influence.csv'), index=None) #tar and remove all_cleaned tar_and_remove(storagesfiles)
def get_sentiment(df): ''' Adds sentiment to the df ''' root_dir = get_root_dir() # s = SentimentIntensityAnalyzer() # df['vader_emotion'] = df['Tweet'].swifter.apply(lambda x: s.polarity_scores(x)['compound']) cop = df['Tweet'].copy() cop = cop.fillna("NA") cop = cop.replace(r'\\n',' ', regex=True) cop = cop.replace(r'\n',' ', regex=True) cop = cop.replace(r'\\r',' ', regex=True) cop = cop.replace(r'\r',' ', regex=True) cop = cop.replace(r'\\t',' ', regex=True) cop = cop.replace(r'\t',' ', regex=True) tempFolder = os.path.join(root_dir, "data/temp") tempFile = os.path.join(tempFolder, "tweets") outputFile = os.path.join(tempFolder, "tweets0_out.txt") cop.to_csv(tempFile, index=None, header=None) if os.path.isfile(outputFile): os.remove(outputFile) sentiFolder = os.path.join(root_dir, "utils") command = "java -jar {} sentidata {} input {}".format(os.path.join(sentiFolder, "SentiStrength.jar"), os.path.join(sentiFolder, "SentiStrength_Data/"), tempFile) print(command) process= Popen(command.split()) process.wait() os.remove(tempFile) aa = pd.read_csv(outputFile, sep="\t") df = df.join(aa[['Positive', 'Negative']]) os.remove(outputFile) df['pos_neg'] = df['Positive'] + df['Negative'] df = df.drop(['Positive', 'Negative'], axis=1) return df
def add_keyword(df, drop_non_existing=False): ''' Adds keyword to the df Parameters: ___________ df (Dataframe): The dataframe drop_non_existing (Boolean): To drop non existing or not ''' #do cascading here too and include the entire cascade if more than 5% of values contain a keyword. The cascade contains that keyword. This means convert to like this current = get_root_dir() path = os.path.join(current, "keywords.csv") keywords = pd.read_csv(path) keywords = keywords.set_index('Symbol') symbol_keyword = {} for idx, row in keywords.iterrows(): currKeywords = [x.strip().lower() for x in row['Keywords'].split(',')] symbol_keyword[idx] = currKeywords #maybe a crypto keyword? Bitcoin should be last def find_which(x): x = x.lower() nonlocal symbol_keyword matches = [] for idx,row in symbol_keyword.items(): for keyword in row: if keyword in x: matches.append(idx) matches = list(set(matches)) if len(matches) == 1: return matches[0] elif len(matches) == 2: if 'BTC' in matches: req_index = 1 - matches.index('BTC') return matches[req_index] return "invalid" df['keyword'] = df['Tweet'].swifter.apply(find_which) return df
def add_influence_and_all(df): d = df.groupby('cascade').apply(get_influence) d = d.drop_duplicates() invalid = d[d['inf'] == 'invalid'] invalid.to_csv(get_root_dir() + "/data/storage/invalid.csv", mode='a') d = d[d['inf'] != 'invalid'] df = df.merge(d, on='ID') df = df.drop('cascade_y', axis=1).rename(columns={'cascade_x': 'cascade'}) new_inf = df.groupby('user_id').apply(get_influence_metrics) new_inf = new_inf.reset_index().rename(columns={'user_id': 'username'}) return new_inf
def price_stream(): keywords = pd.read_csv(get_root_dir() + '/keywords.csv') coin_dir = get_root_dir() + "/data/price" if not os.path.isdir(coin_dir): os.makedirs(coin_dir) for idx, row in keywords.iterrows(): start_timestamp = 1561939200000 exchange_name = row['exchange_name'] pairname = row['pair_name'].replace('/', '') current_file = coin_dir + "/{}.csv".format(row['Symbol']) print(current_file) all_df = pd.DataFrame() if os.path.isfile(current_file): all_df = pd.read_csv(current_file) if len(all_df) > 0: all_df['Time'] = pd.to_datetime(all_df['Time']) start_timestamp = all_df['Time'].astype(int).iloc[-1] // 10**6 start_timestamp = int(start_timestamp) if row['exchange_name'] == 'Binance': curr_df = get_binance_data(pairname, start_timestamp) elif row['exchange_name'] == 'Bitfinex': curr_df = get_bitfinex_data(pairname, start_timestamp) full_df = pd.concat([all_df, curr_df]) full_df = clean_price(full_df) full_df.to_csv(current_file, index=None)
def tar_and_remove(files): archive_dir = get_root_dir() + "/data/archive" if not os.path.isdir(archive_dir): os.makedirs(archive_dir) tar = tarfile.open(archive_dir + "/{}.tar.gz".format(int(time.time())), "w:gz", compresslevel=5) for name in files: tar.add(name) tar.close() for file in files: os.remove(file)
def get_price(symbol, duration='30Min'): ''' If curr_start and curr_end is set to None, returns all, else return for given time. ''' dir = get_root_dir() fname = dir + "/data/price/{}.csv".format(symbol) if not(os.path.isfile(fname)): print('Price data has not been downloaded. Starting Download. This might take some time') from price_stream import price_stream price_stream() df = pd.read_csv(fname) df['Time'] = pd.to_datetime(df['Time']) price_df = df.groupby(pd.Grouper(key='Time', freq=duration, label='right')).apply(merge_time) price_df = price_df.reset_index() return price_df
def rescrape_and_add(original, to_scrape): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) print("Rescraping {} tweets".format(len(to_scrape))) for i in range(100, len(to_scrape) + 100, 100): print("{} {}".format(i - 100, i)) tweets = api.statuses_lookup(list(to_scrape['index'][i - 100:i].values), tweet_mode='extended') for tweet in tweets: response_type = 'tweet' in_response_to = None try: in_response_to = tweet.in_reply_to_status_id in_response_to_user_id = tweet.in_reply_to_user_id_str except: pass if in_response_to == None: if hasattr(tweet, 'retweeted_status'): response_type = 'retweet' in_response_to = tweet.retweeted_status.id in_response_to_user_id = tweet.retweeted_status.user._json[ 'id_str'] #probably not required else: if hasattr(tweet, 'quoted_status'): response_type = 'quoted_retweet' in_response_to = tweet.quoted_status.id in_response_to_user_id = tweet.quoted_status.user._json[ 'id_str'] #probably not required else: in_response_to = '0' else: response_type = 'reply' tweetText = '' try: tweetText = tweetText + tweet.extended_tweet['full_text'] except: try: tweetText = tweetText + tweet.full_text except: pass try: tweetText = tweetText + ' <retweeted_status> ' + tweet.retweeted_status.extended_tweet[ 'full_text'] + ' </retweeted_status>' except: try: tweetText = tweetText + ' <retweeted_status> ' + tweet.retweeted_status.text + ' </retweeted_status>' except: pass try: tweetText = tweetText + ' <quoted_status> ' + tweet.quoted_status.extended_tweet[ 'full_text'] + ' </quoted_status>' except: try: tweetText = tweetText + ' <quoted_status> ' + tweet.quoted_status.text + ' </quoted_status>' except: pass if 'urls' in tweet.entities: for url in tweet.entities['urls']: try: tweetText = tweetText.replace(url['url'], url['expanded_url']) except: pass write_csv([ tweet.created_at, tweet.id, tweetText, tweet.favorite_count, tweet.retweet_count, tweet.user.screen_name, tweet.user._json['id_str'], tweet.user._json['created_at'], in_response_to, in_response_to_user_id, response_type, tweet.user.geo_enabled, tweet.user.verified, tweet.user.statuses_count, tweet.user.followers_count, tweet.user.friends_count, tweet.user.favourites_count, tweet.user.listed_count, tweet.user.profile_use_background_image, tweet.user.protected, tweet.user.default_profile ]) rescraped = pd.read_csv(get_root_dir() + '/data/temp/rescraped.csv') profile = pd.read_csv( os.path.join(get_root_dir(), 'data/cleaned_profile.csv')) original['Time'] = pd.to_datetime(original['Time']) original['Time'] = original['Time'].astype(int) // 10**9 rescraped_df, rescraped_profile = processor(rescraped) non_existing = to_scrape[~to_scrape['index'].isin(rescraped['id'])] virtual_tweets = non_existing['index'].apply(make_tweets, original=original) rescraped = pd.concat([virtual_tweets, rescraped_df]).reset_index(drop=True) virtual_tweets['User'] = virtual_tweets['User'].str.lower() rescrape = virtual_tweets[~virtual_tweets['User'].isin(profile['username'] )] ps = profileScraper() scraped = ps.query_profile(rescrape['User'].values) scraped_profile = process_scraped_profile(scraped) new_profile = pd.concat([scraped_profile, rescraped_profile, profile]) #clean them seperately before concating new_profile = new_profile.drop_duplicates(subset=['username']).reset_index( drop=True) new_profile.to_csv(os.path.join(get_root_dir(), 'data/cleaned_profile.csv'), index=None) rescraped = get_sentiment(rescraped) new_df = pd.concat([original, rescraped]) new_df = new_df.sort_values('Time') return new_df, new_profile
def get_json_info(symbol): with open(get_root_dir() + "/data/parameters.json") as f: json_info = json.load(f) return json_info
df = df.resample('1D').apply(resampler) df = df.reset_index() df = df[['Time', 'Open']].merge(portfolioValue, left_on='Time', right_on='Date').drop('Time', axis=1) df['hodl'] = (initial_cash / df['Open'].iloc[0]) * df['Open'] df = df.drop('Open', axis=1) df.to_csv(curr_dir + '/portfolio.csv', index=None) trades.to_csv(curr_dir + '/trades.csv', index=None) operations.to_csv(curr_dir + '/operations.csv', index=None) with open(os.path.join(curr_dir, "data.json"), 'w') as fp: json.dump(json_data, fp) if __name__ == "__main__": keywords = pd.read_csv(get_root_dir() + '/keywords.csv') for idx, row in keywords.iterrows(): perform_backtest(row['Symbol'], n_fast_par=n_fast_par, n_slow_par=n_slow_par, long_macd_threshold_par=long_macd_threshold_par, long_per_threshold_par=long_per_threshold_par, long_close_threshold_par=long_close_threshold_par, short_macd_threshold_par=short_macd_threshold_par, short_per_threshold_par=short_per_threshold_par, short_close_threshold_par=short_close_threshold_par, initial_cash=initial_cash, comission=comission)
def get_keywords(): keywords = pd.read_csv(get_root_dir() + '/keywords.csv') return keywords
def perform_backtest(symbol_par, n_fast_par, n_slow_par, long_macd_threshold_par, long_per_threshold_par, long_close_threshold_par, short_macd_threshold_par, short_per_threshold_par, short_close_threshold_par, initial_cash=10000, comission=0.1, df=None): ''' Parameter: __________ symbol_par (string): The symbol to use n_fast_par (int): Fast EMA line used during MACD calculation n_slow_par (int): Slower EMA line used during MACD calculation long_macd_threshold_par (int): The threshold of normalized macd, above which we might open a long position long_per_threshold_par (int): The value of percentage change over the last 2 hours above which we might open a long position #Might make this a parameter too long_close_threshold_par (int): Threshold of normalized macd, below which we will close the opened long position short_macd_threshold_par (int): The threshold of normalized macd, below which we might open a short position short_per_threshold_par (int): The value of percentage change over the last 2 hours below which we might open a short position short_close_threshold_par (int): Threshold of normalized macd, above which we will close the opened short position initial_cash (int) (optional): The cash to start from. Initiall 10k comission (int) (option): int fraction value. Defaults to 0.1%. This is much higher than normal. Staying on the safe side. df (Dataframe) (option): Uses df as features dataframe if specified. Otherwise reads the coin folder ''' global n_fast global n_slow global long_macd_threshold global long_per_threshold global long_close_threshold global short_macd_threshold global short_per_threshold global short_close_threshold global symbol n_fast = n_fast_par n_slow = n_slow_par long_macd_threshold = long_macd_threshold_par long_per_threshold = long_per_threshold_par long_close_threshold = long_close_threshold_par short_macd_threshold = short_macd_threshold_par short_per_threshold = short_per_threshold_par short_close_threshold = short_close_threshold_par symbol = symbol_par json_info = {} json_info['n_fast'] = n_fast json_info['n_slow'] = n_slow json_info['long_macd_threshold'] = long_macd_threshold_par json_info['long_per_threshold'] = long_per_threshold_par json_info['long_close_threshold'] = long_close_threshold_par json_info['short_macd_threshold'] = short_macd_threshold_par json_info['short_per_threshold'] = short_per_threshold_par json_info['short_close_threshold'] = short_close_threshold_par json_info['initial_cash'] = initial_cash json_info['comission'] = comission with open(get_root_dir() + "/data/parameters.json", 'w') as f: json.dump(json_info, f) features_file = get_root_dir() + "/data/features/{}.csv".format(symbol) if df is None: df = pd.read_csv(features_file) df['macd'] = ta.trend.macd(df['sentistrength_total'], n_fast=n_fast, n_slow=n_slow, fillna=True) df['macd'] = df['macd'].fillna(0) df['Time'] = pd.to_datetime(df['Time']) json_data = {} json_data['mean'] = df['macd'].mean() json_data['std'] = df['macd'].std() df['macd'] = (df['macd'] - json_data['mean']) / json_data['std'] df = df.dropna(subset=['Time']) curr_dir = get_root_dir() + "/data/backtest/{}".format(symbol) if not os.path.exists(curr_dir): os.makedirs(curr_dir) fig = create_plot(df, 'macd', 'SentiStength') plotly_json = fig.to_json() html = fig.to_html() with open(curr_dir + '/plotly.html', 'w') as file: file.write(html) with open(curr_dir + '/plotly.json', 'w') as file: file.write(plotly_json) df = df[['Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'macd']] df.to_csv(os.path.join(curr_dir, "data.csv"), index=None) data = PandasData_Custom(dataname=df) cerebro = bt.Cerebro(cheat_on_open=True, maxcpus=None) cerebro.adddata(data) cerebro.addstrategy(tradeStrategy) cerebro.addanalyzer(bt.analyzers.SharpeRatio_A) cerebro.addanalyzer(bt.analyzers.Calmar) cerebro.addanalyzer(bt.analyzers.DrawDown) cerebro.addanalyzer(bt.analyzers.Returns) cerebro.addanalyzer(bt.analyzers.TradeAnalyzer) cerebro.broker.setcash(initial_cash) cerebro.broker.setcommission(comission / 100) run = cerebro.run() analysis = run[0].analyzers.getbyname('tradeanalyzer').get_analysis() trade_analyzer = {} trade_analyzer['total'] = analysis['total']['total'] trade_analyzer['open'] = analysis['total']['open'] trade_analyzer['closed'] = analysis['total']['closed'] with open(curr_dir + "/stats.json", 'w') as f: json.dump(trade_analyzer, f) portfolioValue, trades, operations = run[0].get_logs() # fig = cerebro.plot() # figure = fig[0][0] # figure.savefig(curr_dir + "/backtest.png") output_file(curr_dir + "/backtest.html") b = Bokeh(style='bar', plot_mode="tabs", scheme=Tradimo()) b.plot_result(run) df = df.set_index('Time') df = df.resample('1D').apply(resampler) df = df.reset_index() df = df[['Time', 'Open']].merge(portfolioValue, left_on='Time', right_on='Date').drop('Time', axis=1) df['hodl'] = (initial_cash / df['Open'].iloc[0]) * df['Open'] df = df.drop('Open', axis=1) df.to_csv(curr_dir + '/portfolio.csv', index=None) trades.to_csv(curr_dir + '/trades.csv', index=None) operations.to_csv(curr_dir + '/operations.csv', index=None) with open(os.path.join(curr_dir, "data.json"), 'w') as fp: json.dump(json_data, fp)
def get_features(tweet_df, price_df, coin_name, curr_start, curr_end, minutes=30): ''' Parameters: ___________ tweet_df (DataFrame): Dataframe of tweets for the current coin price_df (DataFrame): Dataframe of price of the current coin coin_name (string): Name of coin curr_start (Timestamp): Starting time of the current all_cleaned curr_end (Timestamp): Ending time of the current all_cleaned ''' features_dir = get_root_dir() + "/data/features" if not os.path.isdir(features_dir): os.makedirs(features_dir) features_file = features_dir + '/{}.csv'.format(coin_name) userwise_inf_file = os.path.join(get_root_dir(), 'data/userwise_influence.csv') userwise_inf = pd.read_csv(userwise_inf_file) tweet_df['Time'] = pd.to_datetime(tweet_df['Time']) tweet_df = tweet_df.sort_values('Time') tweet_df = tweet_df.merge( userwise_inf[['username', 'avg_influence', 'total_influence']], left_on='User', right_on='username', how='left') tweet_df['avg_influence'] = tweet_df['avg_influence'].fillna( 2 ) #half the average if that user does not exist. This number goes down as our dataset goes up tweet_df['total_influence'] = tweet_df['total_influence'].fillna( 6) #half the average for same reason price_df = price_df[(price_df['Time'] >= curr_start) & (price_df['Time'] <= curr_end)].reset_index( drop=True) features = tweet_df.groupby('Time').apply(tweets_to_features) features = price_df.merge(features, how='left', on='Time') features = features.fillna(0) if os.path.isfile(features_file): features = pd.concat([pd.read_csv(features_file), features]) features['Time'] = pd.to_datetime(features['Time']) features = features.sort_values('Time') features = features.drop_duplicates('Time', keep='last') features.to_csv(features_file, index=None) return features
if __name__ == "__main__": twitterThread = Thread(target=twitter_stream) twitterThread.start() print('Started Live Tweet Collection') price_stream() #Streaming price first time print('Started Live Price Collection') priceFlag = Event() thread = MyThread(priceFlag, price_stream, 30) thread.start() dir = get_root_dir() temp_dir = os.path.join(dir, 'data/temp') if not os.path.isdir(temp_dir): os.makedirs(temp_dir) oneFlag = Event() oneThread = MyThread(oneFlag, one_minute_cleaning, 60) oneThread.start() tenFlag = Event() tenThread = MyThread(oneFlag, ten_minute_profile, 60) tenThread.start() tenBacktestFlag = Event() tenBacktestThread = MyThread(oneFlag, ten_minutes_backtest, 60)
def index(request): files = glob('algorithm/data/backtest/*') coinwise_stats = {} combined_portfolio = pd.DataFrame() btc = pd.read_csv('algorithm/data/backtest/BTC/portfolio.csv') combined_portfolio['Date'] = btc['Date'] dates = [] for file in files: symbol = file.split('/')[-1].replace('.csv', '') curr_df, coinwise_stats[symbol] = get_stats(symbol) combined_portfolio = curr_df.merge(combined_portfolio, on='Date', how='right') combined_portfolio['btc_portfolio'] = btc['hodl'] combined_portfolio['btc_portfolio'] = combined_portfolio[ 'btc_portfolio'] * (len(combined_portfolio.columns) - 2) df = pd.DataFrame.from_dict(coinwise_stats) df = df.T combined_portfolio = combined_portfolio.set_index('Date') print(combined_portfolio.columns) div = pd.DataFrame( ((combined_portfolio.iloc[-1] / combined_portfolio.iloc[0]) - 1) * 100).reset_index() div.columns = ['Symbol', 'Change'] div['Change'] = div['Change'].round(2) div = div.sort_values('Change', ascending=False) combined_portfolio['portfolio'] = combined_portfolio.drop( 'btc_portfolio', axis=1).sum(axis=1) combined_portfolio = combined_portfolio[['portfolio', 'btc_portfolio']] combined_portfolio = combined_portfolio.reset_index() combined_portfolio = combined_portfolio.rename(columns={'Date': 'Time'}) fig = create_plot(combined_portfolio, 'portfolio', 'Portfolio Movement', 'btc_portfolio', 'Bitcoin HODL Portfolio') html = fig.to_html() with open('interface/static/interface/plotly.html', 'w') as file: file.write(html) dictionary = dict(zip(div.Symbol, div.Change)) print(dictionary) top_ten = ['TRX', 'OMG', 'MIOTA', 'ZEC', 'LTC', 'ETC', 'XTZ', 'BSV', 'SAN'] btc['Date'] = pd.to_datetime(btc['Date']) top_df = df.loc[top_ten] forward_metrics = {} forward_metrics['Started From'] = btc.iloc[0]['Date'].strftime('%Y-%m-%d') forward_metrics['Traded for'] = str( (btc.iloc[-1]['Date'] - btc.iloc[0]['Date']).days) + ' days' forward_metrics['Total Return'] = str( round( (sum(df['end_cash']) / sum(df['start_cash']) - 1) * 100, 2)) + " %" forward_metrics['Total Return - Predetermined Coins'] = str( round((sum(top_df['end_cash']) / sum(top_df['start_cash']) - 1) * 100, 2)) + " %" forward_metrics['Return VS hodl all coins'] = str( round((sum(df['end_cash']) / sum(df['end_hodl']) - 1) * 100, 2)) + " %" # forward_metrics['Bitcoin hodl VS Portfolio'] = dictionary['btc_portfolio'] del dictionary['btc_portfolio'] symbols = get_symbols() #Features info calculation files = glob('algorithm/data/backtest/*') features_df = pd.DataFrame() position_df = pd.DataFrame( columns=['Symbol', 'Change', 'MACD', 'Position', 'Prediction', 'Move']) for file in files: symbol = file.split('/')[-1].replace('.csv', '') curr_features = get_features_stats(symbol) curr_features['Symbol'] = symbol features_df = features_df.append(pd.Series(curr_features), ignore_index=True) current_values, current_position, current_prediction, current_move = get_position_prediction_move( symbol) position_df = position_df.append(pd.Series({ 'Symbol': symbol, 'Change': round(current_values['Change'], 2), 'MACD': round(current_values['MACD'], 2), 'Position': current_position, 'Prediction': current_prediction, 'Move': current_move }), ignore_index=True) with open(get_root_dir() + "/data/parameters.json") as f: json_info = json.load(f) features_df = features_df[['Symbol'] + list(features_df.columns[:-1])] features_df = features_df.sort_values('Current MACD').reset_index( drop=True) features_df = features_df.round(2) predictions_df = position_df[ position_df['Prediction'] != "None"].reset_index(drop=True) position_df = position_df[position_df['Position'] != 'None'].reset_index( drop=True) return render( request, "interface/index.html", { 'forward_metrics': forward_metrics, 'current_parameters': json_info, 'all_time_coinwise': dictionary, 'symbols': symbols, 'features': features_df.values.tolist(), 'features_header': list(features_df.columns), 'predictions': predictions_df.values.tolist(), 'positions_predictions_headers': list(position_df.columns), 'positions': position_df.values.tolist() })