Example #1
0
    def test_tweetio_read_db2(self):
        coin = Coin()

        coin.name = 'omg'
        yesterday = date.today() + timedelta(days=-1)
        coin.loadtime = yesterday.strftime("%Y-%m-%d")
        coin.hashtags = ['omg', 'omisego']

        tio = TweetIO()
        df = tio.read_db_tweet_last_n_hour_by_coin(coin)
        print(df)
Example #2
0
    def test_tweetio_read_db3(self):
        tio = TweetIO()
        coin = Coin()
        coin.name = 'neo'
        nowdt = datetime.now()

        dfnow1 = tio.read_db_tweet_last_n_hour_by_specific_hour_by_coin(
            coin, nowdt + timedelta(hours=-1))

        print("dfnow1:")
        print(dfnow1)

        print("test case finishes.")
Example #3
0
    def test_tweetio_read_db1(self):
        tio = TweetIO()
        coin = Coin()
        coin.name = 'neo'
        nowdt = datetime.now()

        dfnow1 = tio.read_db_retweet_last_n_hour_by_specific_hour_by_coin(
            coin, nowdt)
        dfnow2 = tio.read_db_retweet_last_n_hour_by_coin(coin)
        print("dfnow1:")
        print(dfnow1)
        print("dfnow2:")
        print(dfnow2)

        print("equality check: ")
        self.assertTrue(dfnow1.equals(dfnow2))

        print("test case finishes.")
Example #4
0
    def test_tweetio_read_predict(self):

        pd.set_option("display.max_rows",100)
        pd.set_option("display.max_columns",100)

        tio = TweetIO()
        coin = Coin()
        coin.name = 'omg'
        nowdt = datetime.now()
        argtime='2018-04-14_16-00-00'

        coin.read_from_storeage('rt','runtime/'+argtime+'/')
        print(coin.data_to_predict)

        print("test case finishes.")
Example #5
0
from coins.coin import Coin
import pandas as pd

print('Main starts')
#cinfo=CoinInfo()
#coinlist=cinfo.list_coins('./data/altcoin-1hour')

## choosing first one: neo
#coin=coinlist[19]
coin = Coin()
coin.path = "./data/altcoin-1hour/ada.csv"
coin.name = "ada"
coin.ico = "2017-10-01"
#coin.ico="2016-02-17"

tweetio = TweetIO()
coin.read_from_storeage("prepare1")
print(coin.tweets.columns)
print(coin.retweets.columns)
df = tweetio.sort_and_clip(coin.tweets, coin.ico)
coin.tweets = df

## MULTIPLYING RETWEETS FOLLOWERS

print("multiplying nr. of retweet followers by sentiments.")
sentanalyzer = SentimentAnalyzer()
sentanalyzer.merge_tweets_with_retweets(coin)
#print(coin.tweets)
sentanalyzer.sent_mul_tweet_followers(coin)
sentanalyzer.sent_mul_retweet_followers(coin)
Example #6
0
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt

print('Main starts plotting')
cinfo = CoinInfo()
coinlist = cinfo.list_coins('./data/altcoin-1hour')

## choosing first one: neo
coin = coinlist[18]
coin.ico = "2016-05-01"

tweetio = TweetIO()
coin.read_from_storeage("prepare2")

df_ohlc = coin.pricehourly.copy()
df_ohlc = df_ohlc[['datetime', 'open', 'high', 'low', 'close']]
#df_ohlc=df_ohlc.drop(['time1','volumefrom','volumeto'],axis=1)
fromperiod = '2017-08-01'
toperiod = '2017-08-25'
df_ohlc = df_ohlc[(df_ohlc['datetime'] >= fromperiod)
                  & (df_ohlc['datetime'] < toperiod)]

coin.gtdf['max_datetime_epoch'] = coin.gtdf['max_datetime']
coin.gtdf['max_datetime'] = pd.to_datetime(
    coin.gtdf['max_datetime'].astype('int') * int(1e6))
coin.grtdf['max_datetime_epoch'] = coin.grtdf['max_datetime']
coin.grtdf['max_datetime'] = pd.to_datetime(
Example #7
0
def do_prepare(coin, specific_hour):

    pd.set_option('display.max_rows', 99)
    pd.set_option('precision', 10)
    pd.set_option('display.width', 1000)

    #Getting Tweets from DB
    tweetio = TweetIO()
    df = tweetio.read_db_tweet_last_n_hour_by_specific_hour_by_coin(
        coin, specific_hour)
    coin.tweets = df
    print("tweets from DB: ")
    print(len(df))

    #Filter and sort tweets
    #tapi = TwitterApi() we dont need twitter connection:
    tweetcollector = TweetCollector(None)
    df = tweetcollector.filter_tweets(coin.tweets)
    df = tweetio.sort_and_clip(df, coin.loadtime)
    coin.tweets = df
    print("tweets>: ")
    print(len(df))

    #Collect retweets, users
    rdf = tweetio.read_db_retweet_last_n_hour_by_specific_hour_by_coin(
        coin, specific_hour)
    setattr(coin, 'retweets', rdf)

    print("retweets>: ")
    #print(rdf)
    udf = tweetio.read_db_referenced_users(coin)
    print("Users>: ")
    print(len(udf))

    #tweetcollector.collect_all_users(coin, tapi, tmpdir=tmpd)

    ##PREPARE 1
    #df = tweetio.read_all_scraped_retweet(coin, tmpd)

    ## MERGING TWEET FOLLOWERS
    #tweetio.read_users_for_tweets(coin, tmpd)
    print("nr. of tweets before merge:")
    print(len(coin.tweets))
    coin.tweets = coin.tweets.merge(udf,
                                    left_on='user_history_row_id',
                                    right_on='user_row_id',
                                    how='inner')
    print("nr. of tweets after merge:")
    print(len(coin.tweets))

    sid = SentimentAnalyzer()
    #only with multicore CPU:
    #dfsents = sid.paralellanalyse(coin.tweets)
    #this with singlecore CPU:
    dfsents = sid.analysedf(coin.tweets)
    # print(dfsents.head())
    print("coin.tweets ready.")

    setattr(coin, 'tweets', dfsents)

    # PREPARE 2

    #df = tweetio.sort_and_clip(coin.tweets, coin.ico)
    #coin.tweets = df

    ## MULTIPLYING RETWEETS FOLLOWERS

    print("multiplying nr. of retweet followers by sentiments.")
    sentanalyzer = SentimentAnalyzer()
    sentanalyzer.merge_tweets_with_retweets(coin)
    sentanalyzer.sent_mul_tweet_followers(coin)
    sentanalyzer.sent_mul_retweet_followers(coin)

    print(len(coin.retweets))
    #print(coin.retweets.head())

    ## GROUPING RETWEETS BY HOUR

    print("grouping retweets by hour basis")
    sentanalyzer.group_retweet_by_hour(coin)
    #print(coin.grtdf.head())

    print("grouping tweets by hour basis")
    sentanalyzer.group_tweet_by_hour(coin)
    #print(coin.gtdf.head())

    print("RETWEET S")
    #print(coin.retweets)
    print(len(coin.retweets))

    print("TWEETS")
    #print(coin.tweets)
    print(len(coin.tweets))

    print("USERS")
    #print(udf)
    print(len(udf))

    ## Setting in prices:
    bapi = BinanceApi()
    coin_price = bapi.collect_coindata(coin, specific_hour)
    setattr(coin, 'pricehourly', coin_price)

    coin.save_to_storeage(PHASE, tmpdir='runtime/')

    return coin
Example #8
0
cinfo=CoinInfo()
coinlist=cinfo.list_coins('./data/altcoin-1hour')

## choosing first one: xyz
coin=coinlist[9]
print(coin.name)

coin.ico="2017-07-1"
coin.hashtags=['eos']

tapi=TwitterApi()

tweetcollector=TweetCollector(tapi.get_api())
#do not scrape, just once
#tweetcollector.scrape_coin_tweets(coin)

tweetio=TweetIO()
df=tweetio.read_all_scraped_tweet(coin)
df=tweetio.sort_and_clip(df,coin.ico)
coin.tweets=df
setattr( coin, 'tweets', df )

print(coin.tweets.tail())

stat=Statistics()
stat.plot_tweet_stat(coin)

#tweetcollector.collect_all_retweets(coin,tapi,1,99000)
#tweetcollector.collect_all_users(coin,tapi)

Example #9
0
from twitter.tweetio import TweetIO
from twitter.sentiment import SentimentAnalyzer
from twitter.tweepy import TwitterApi
from twitter.tweetcollector import TweetCollector

print('Main starts')
cinfo = CoinInfo()
coinlist = cinfo.list_coins('./data/altcoin-1hour')

## choosing first one: neo
coin = Coin()
coin.name = "ada"
coin.ico = "2017-10-01"
#coin.ico="2016-02-17"

tweetio = TweetIO()
print("read already scraped  retweets:")
df = tweetio.read_all_scraped_retweet(coin)
setattr(coin, 'retweets', df)
print("coin.retweets.head()")
print(coin.retweets.tail())
print("retweets done...")

tapi = TwitterApi()
tweetcollector = TweetCollector(tapi)
print("read already scraped tweets:")
df = tweetio.read_all_scraped_tweet(coin)
print("before filter: ", len(df.index))
df = tweetcollector.filter_tweets(df)
print("after filter: ", len(df.index))
print(df.columns)