def test_tweetio_read_db2(self): coin = Coin() coin.name = 'omg' yesterday = date.today() + timedelta(days=-1) coin.loadtime = yesterday.strftime("%Y-%m-%d") coin.hashtags = ['omg', 'omisego'] tio = TweetIO() df = tio.read_db_tweet_last_n_hour_by_coin(coin) print(df)
def test_tweetio_read_db3(self): tio = TweetIO() coin = Coin() coin.name = 'neo' nowdt = datetime.now() dfnow1 = tio.read_db_tweet_last_n_hour_by_specific_hour_by_coin( coin, nowdt + timedelta(hours=-1)) print("dfnow1:") print(dfnow1) print("test case finishes.")
def test_tweetio_read_db1(self): tio = TweetIO() coin = Coin() coin.name = 'neo' nowdt = datetime.now() dfnow1 = tio.read_db_retweet_last_n_hour_by_specific_hour_by_coin( coin, nowdt) dfnow2 = tio.read_db_retweet_last_n_hour_by_coin(coin) print("dfnow1:") print(dfnow1) print("dfnow2:") print(dfnow2) print("equality check: ") self.assertTrue(dfnow1.equals(dfnow2)) print("test case finishes.")
def test_tweetio_read_predict(self): pd.set_option("display.max_rows",100) pd.set_option("display.max_columns",100) tio = TweetIO() coin = Coin() coin.name = 'omg' nowdt = datetime.now() argtime='2018-04-14_16-00-00' coin.read_from_storeage('rt','runtime/'+argtime+'/') print(coin.data_to_predict) print("test case finishes.")
from coins.coin import Coin import pandas as pd print('Main starts') #cinfo=CoinInfo() #coinlist=cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: neo #coin=coinlist[19] coin = Coin() coin.path = "./data/altcoin-1hour/ada.csv" coin.name = "ada" coin.ico = "2017-10-01" #coin.ico="2016-02-17" tweetio = TweetIO() coin.read_from_storeage("prepare1") print(coin.tweets.columns) print(coin.retweets.columns) df = tweetio.sort_and_clip(coin.tweets, coin.ico) coin.tweets = df ## MULTIPLYING RETWEETS FOLLOWERS print("multiplying nr. of retweet followers by sentiments.") sentanalyzer = SentimentAnalyzer() sentanalyzer.merge_tweets_with_retweets(coin) #print(coin.tweets) sentanalyzer.sent_mul_tweet_followers(coin) sentanalyzer.sent_mul_retweet_followers(coin)
import pandas as pd import matplotlib.pyplot as plt import matplotlib.ticker as mticker from matplotlib.finance import candlestick_ohlc import matplotlib.dates as mdates import datetime as dt print('Main starts plotting') cinfo = CoinInfo() coinlist = cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: neo coin = coinlist[18] coin.ico = "2016-05-01" tweetio = TweetIO() coin.read_from_storeage("prepare2") df_ohlc = coin.pricehourly.copy() df_ohlc = df_ohlc[['datetime', 'open', 'high', 'low', 'close']] #df_ohlc=df_ohlc.drop(['time1','volumefrom','volumeto'],axis=1) fromperiod = '2017-08-01' toperiod = '2017-08-25' df_ohlc = df_ohlc[(df_ohlc['datetime'] >= fromperiod) & (df_ohlc['datetime'] < toperiod)] coin.gtdf['max_datetime_epoch'] = coin.gtdf['max_datetime'] coin.gtdf['max_datetime'] = pd.to_datetime( coin.gtdf['max_datetime'].astype('int') * int(1e6)) coin.grtdf['max_datetime_epoch'] = coin.grtdf['max_datetime'] coin.grtdf['max_datetime'] = pd.to_datetime(
def do_prepare(coin, specific_hour): pd.set_option('display.max_rows', 99) pd.set_option('precision', 10) pd.set_option('display.width', 1000) #Getting Tweets from DB tweetio = TweetIO() df = tweetio.read_db_tweet_last_n_hour_by_specific_hour_by_coin( coin, specific_hour) coin.tweets = df print("tweets from DB: ") print(len(df)) #Filter and sort tweets #tapi = TwitterApi() we dont need twitter connection: tweetcollector = TweetCollector(None) df = tweetcollector.filter_tweets(coin.tweets) df = tweetio.sort_and_clip(df, coin.loadtime) coin.tweets = df print("tweets>: ") print(len(df)) #Collect retweets, users rdf = tweetio.read_db_retweet_last_n_hour_by_specific_hour_by_coin( coin, specific_hour) setattr(coin, 'retweets', rdf) print("retweets>: ") #print(rdf) udf = tweetio.read_db_referenced_users(coin) print("Users>: ") print(len(udf)) #tweetcollector.collect_all_users(coin, tapi, tmpdir=tmpd) ##PREPARE 1 #df = tweetio.read_all_scraped_retweet(coin, tmpd) ## MERGING TWEET FOLLOWERS #tweetio.read_users_for_tweets(coin, tmpd) print("nr. of tweets before merge:") print(len(coin.tweets)) coin.tweets = coin.tweets.merge(udf, left_on='user_history_row_id', right_on='user_row_id', how='inner') print("nr. of tweets after merge:") print(len(coin.tweets)) sid = SentimentAnalyzer() #only with multicore CPU: #dfsents = sid.paralellanalyse(coin.tweets) #this with singlecore CPU: dfsents = sid.analysedf(coin.tweets) # print(dfsents.head()) print("coin.tweets ready.") setattr(coin, 'tweets', dfsents) # PREPARE 2 #df = tweetio.sort_and_clip(coin.tweets, coin.ico) #coin.tweets = df ## MULTIPLYING RETWEETS FOLLOWERS print("multiplying nr. of retweet followers by sentiments.") sentanalyzer = SentimentAnalyzer() sentanalyzer.merge_tweets_with_retweets(coin) sentanalyzer.sent_mul_tweet_followers(coin) sentanalyzer.sent_mul_retweet_followers(coin) print(len(coin.retweets)) #print(coin.retweets.head()) ## GROUPING RETWEETS BY HOUR print("grouping retweets by hour basis") sentanalyzer.group_retweet_by_hour(coin) #print(coin.grtdf.head()) print("grouping tweets by hour basis") sentanalyzer.group_tweet_by_hour(coin) #print(coin.gtdf.head()) print("RETWEET S") #print(coin.retweets) print(len(coin.retweets)) print("TWEETS") #print(coin.tweets) print(len(coin.tweets)) print("USERS") #print(udf) print(len(udf)) ## Setting in prices: bapi = BinanceApi() coin_price = bapi.collect_coindata(coin, specific_hour) setattr(coin, 'pricehourly', coin_price) coin.save_to_storeage(PHASE, tmpdir='runtime/') return coin
cinfo=CoinInfo() coinlist=cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: xyz coin=coinlist[9] print(coin.name) coin.ico="2017-07-1" coin.hashtags=['eos'] tapi=TwitterApi() tweetcollector=TweetCollector(tapi.get_api()) #do not scrape, just once #tweetcollector.scrape_coin_tweets(coin) tweetio=TweetIO() df=tweetio.read_all_scraped_tweet(coin) df=tweetio.sort_and_clip(df,coin.ico) coin.tweets=df setattr( coin, 'tweets', df ) print(coin.tweets.tail()) stat=Statistics() stat.plot_tweet_stat(coin) #tweetcollector.collect_all_retweets(coin,tapi,1,99000) #tweetcollector.collect_all_users(coin,tapi)
from twitter.tweetio import TweetIO from twitter.sentiment import SentimentAnalyzer from twitter.tweepy import TwitterApi from twitter.tweetcollector import TweetCollector print('Main starts') cinfo = CoinInfo() coinlist = cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: neo coin = Coin() coin.name = "ada" coin.ico = "2017-10-01" #coin.ico="2016-02-17" tweetio = TweetIO() print("read already scraped retweets:") df = tweetio.read_all_scraped_retweet(coin) setattr(coin, 'retweets', df) print("coin.retweets.head()") print(coin.retweets.tail()) print("retweets done...") tapi = TwitterApi() tweetcollector = TweetCollector(tapi) print("read already scraped tweets:") df = tweetio.read_all_scraped_tweet(coin) print("before filter: ", len(df.index)) df = tweetcollector.filter_tweets(df) print("after filter: ", len(df.index)) print(df.columns)