def do_prepare(coin, specific_hour): pd.set_option('display.max_rows', 99) pd.set_option('precision', 10) pd.set_option('display.width', 1000) #Getting Tweets from DB tweetio = TweetIO() df = tweetio.read_db_tweet_last_n_hour_by_specific_hour_by_coin( coin, specific_hour) coin.tweets = df print("tweets from DB: ") print(len(df)) #Filter and sort tweets #tapi = TwitterApi() we dont need twitter connection: tweetcollector = TweetCollector(None) df = tweetcollector.filter_tweets(coin.tweets) df = tweetio.sort_and_clip(df, coin.loadtime) coin.tweets = df print("tweets>: ") print(len(df)) #Collect retweets, users rdf = tweetio.read_db_retweet_last_n_hour_by_specific_hour_by_coin( coin, specific_hour) setattr(coin, 'retweets', rdf) print("retweets>: ") #print(rdf) udf = tweetio.read_db_referenced_users(coin) print("Users>: ") print(len(udf)) #tweetcollector.collect_all_users(coin, tapi, tmpdir=tmpd) ##PREPARE 1 #df = tweetio.read_all_scraped_retweet(coin, tmpd) ## MERGING TWEET FOLLOWERS #tweetio.read_users_for_tweets(coin, tmpd) print("nr. of tweets before merge:") print(len(coin.tweets)) coin.tweets = coin.tweets.merge(udf, left_on='user_history_row_id', right_on='user_row_id', how='inner') print("nr. of tweets after merge:") print(len(coin.tweets)) sid = SentimentAnalyzer() #only with multicore CPU: #dfsents = sid.paralellanalyse(coin.tweets) #this with singlecore CPU: dfsents = sid.analysedf(coin.tweets) # print(dfsents.head()) print("coin.tweets ready.") setattr(coin, 'tweets', dfsents) # PREPARE 2 #df = tweetio.sort_and_clip(coin.tweets, coin.ico) #coin.tweets = df ## MULTIPLYING RETWEETS FOLLOWERS print("multiplying nr. of retweet followers by sentiments.") sentanalyzer = SentimentAnalyzer() sentanalyzer.merge_tweets_with_retweets(coin) sentanalyzer.sent_mul_tweet_followers(coin) sentanalyzer.sent_mul_retweet_followers(coin) print(len(coin.retweets)) #print(coin.retweets.head()) ## GROUPING RETWEETS BY HOUR print("grouping retweets by hour basis") sentanalyzer.group_retweet_by_hour(coin) #print(coin.grtdf.head()) print("grouping tweets by hour basis") sentanalyzer.group_tweet_by_hour(coin) #print(coin.gtdf.head()) print("RETWEET S") #print(coin.retweets) print(len(coin.retweets)) print("TWEETS") #print(coin.tweets) print(len(coin.tweets)) print("USERS") #print(udf) print(len(udf)) ## Setting in prices: bapi = BinanceApi() coin_price = bapi.collect_coindata(coin, specific_hour) setattr(coin, 'pricehourly', coin_price) coin.save_to_storeage(PHASE, tmpdir='runtime/') return coin
#cinfo=CoinInfo() #coinlist=cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: neo #coin=coinlist[19] coin = Coin() coin.path = "./data/altcoin-1hour/ada.csv" coin.name = "ada" coin.ico = "2017-10-01" #coin.ico="2016-02-17" tweetio = TweetIO() coin.read_from_storeage("prepare1") print(coin.tweets.columns) print(coin.retweets.columns) df = tweetio.sort_and_clip(coin.tweets, coin.ico) coin.tweets = df ## MULTIPLYING RETWEETS FOLLOWERS print("multiplying nr. of retweet followers by sentiments.") sentanalyzer = SentimentAnalyzer() sentanalyzer.merge_tweets_with_retweets(coin) #print(coin.tweets) sentanalyzer.sent_mul_tweet_followers(coin) sentanalyzer.sent_mul_retweet_followers(coin) print(len(coin.retweets)) print(coin.retweets.tail()) ## GROUPING RETWEETS BY HOUR
cinfo=CoinInfo() coinlist=cinfo.list_coins('./data/altcoin-1hour') ## choosing first one: xyz coin=coinlist[9] print(coin.name) coin.ico="2017-07-1" coin.hashtags=['eos'] tapi=TwitterApi() tweetcollector=TweetCollector(tapi.get_api()) #do not scrape, just once #tweetcollector.scrape_coin_tweets(coin) tweetio=TweetIO() df=tweetio.read_all_scraped_tweet(coin) df=tweetio.sort_and_clip(df,coin.ico) coin.tweets=df setattr( coin, 'tweets', df ) print(coin.tweets.tail()) stat=Statistics() stat.plot_tweet_stat(coin) #tweetcollector.collect_all_retweets(coin,tapi,1,99000) #tweetcollector.collect_all_users(coin,tapi)