예제 #1
0
    def test_database_overwrite (self):

        #
        # Create database
        #
        database = Database (':memory:')

        #
        # Setup some coin entries
        #
        entries = []
        entries.append (Entry (timestamp=Timestamp ('2017-06-18 12:00'), value=230.0))
        entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=2200.12))
        entries.append (Entry (timestamp=Timestamp ('2017-06-18 21:00'), value=240.0))

        entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=242.0))

        database.add ('Test::ETH', entries)

        entries = database.get ('Test::ETH')
        self.assertEqual (len (entries), 3)

        for entry in entries:
            if entry.timestamp == Timestamp ('2017-06-18 15:00'):
                self.assertEqual (entry.value, 242.0)
예제 #2
0
def test_historical_prices():

    client = CryptoCompare()
    prices = client.get_historical_prices('ETH', Timestamp('2016-04-08 06:00'),
                                          Interval.hour)

    print(len(prices))

    if prices:
        print(Timestamp(prices[1]['time']))
        print(Timestamp(prices[-1]['time']))
예제 #3
0
    def test_timestamp_create(self):

        #
        # Timestamp for 'now' must be always in UTC time
        #
        s1 = Timestamp()
        s2 = Timestamp(datetime.utcnow())
        s3 = Timestamp(datetime.utcnow().timestamp())

        self.assertEqual(s1, s2)
        self.assertEqual(s1, s3)

        s1 = Timestamp('2017-04-21 14:00')
        s2 = Timestamp('2017-04-21 14:30')
        s3 = Timestamp('2017-04-21 14:59')
        s4 = Timestamp('2017-04-21 15:00')
        s5 = Timestamp('2017-04-22 14:30')

        #
        # Parsed dates must not be changes to other time zones
        #
        self.assertEqual(s1.timestamp.hour, 14)
        self.assertEqual(s2.timestamp.hour, 14)
        self.assertEqual(s3.timestamp.hour, 14)
        self.assertEqual(s4.timestamp.hour, 15)
        self.assertEqual(s5.timestamp.hour, 14)

        self.assertEqual(s1, s2)
        self.assertEqual(s1, s3)
        self.assertNotEqual(s1, s4)
        self.assertNotEqual(s1, s5)
예제 #4
0
def test_error():

    client = CryptoCompare()

    try:
        client.get_historical_prices('XYZ', Timestamp('2016-04-08 06:00'),
                                     Interval.hour)
    except HTTPError as e:
        print('ERROR:', e.message)
예제 #5
0
    def get(self, id):

        assert id is not Database.CHANNELS_ID
        assert id is not Database.CREDENTIALS_ID

        channel = self.get_channel(id)
        assert channel

        command = 'SELECT * FROM "{channel}"'.format(channel=id)
        rows = self.cursor.execute(command)

        return [
            Entry(timestamp=Timestamp(row[0]), value=row[1]) for row in rows
        ]
예제 #6
0
    def run (self, database, start, end, interval, log):
        credentials = self.get_credentials (database)

        server = twitter.Twitter (auth=twitter.OAuth (credentials['access_key'],
                                                      credentials['access_secret'],
                                                      credentials['consumer_key'],
                                                      credentials['consumer_secret']))

        for channel, tags in TwitterScraper.CHANNELS.items ():

            query = server.search.tweets (q=' '.join (tags), count=100)
            entries = []

            for q in query['statuses']:

                tweet = self.to_string (q['text'])
                tweet = self.tokenize (tweet)
                tweet = [token if self.emoticon_regexp.search (token) else token.lower () for token in tweet]

                entries.append (Entry (timestamp=Timestamp (q['created_at']), value=json.dumps (tweet)))

            database.add (TwitterScraper.ID + '::' + channel, entries)
예제 #7
0
    def test_database_read_write (self):

        #
        # Create database
        #
        database = Database (':memory:')

        #
        # Add some entries
        #
        eth_entries = []
        eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 12:00'), value=234.32))
        eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 14:00'), value=240.00))
        eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 16:00'), value=272.98))

        database.add ('Test::ETH', eth_entries)

        btc_entries = []
        btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 13:00'), value=230.00))
        btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 15:00'), value=242.00))
        btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 17:00'), value=270.98))
        btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 19:00'), value=272.78))

        database.add ('Test::BTC', btc_entries)

        entries = database.get_all_channels ()

        self.assertTrue (len (entries) >= 3)
        self.assertTrue ('Test::ETH' in [entry.id for entry in entries])
        self.assertTrue ('Test::Twitter::ETH' in [entry.id for entry in entries])

        entries = database.get ('Test::ETH')
        self.assertEqual (len (entries), 3)

        entries = database.get ('Test::BTC')
        self.assertEqual (len (entries), 4)
예제 #8
0
                        default=None,
                        help='Passwort for database encryption')
    parser.add_argument('database',
                        type=str,
                        default=None,
                        help='Database file')

    args = parser.parse_args()

    database = Database(args.database, args.password)

    #
    # Fetch all entries from database and compute earliest entry. The latest entry is
    # always expected to be at the current date.
    #
    minimum_timestamp = Timestamp(Configuration.DATABASE_START_DATE)
    maximum_timestamp = Timestamp.now()

    diff = maximum_timestamp - minimum_timestamp
    number_of_steps = 0
    step = None

    if Configuration.DATABASE_SAMPLING_INTERVAL is Interval.day:
        number_of_steps = abs(diff.days)
        step = timedelta(days=1)
    elif Configuration.DATABASE_SAMPLING_INTERVAL is Interval.hour:
        number_of_steps = int(
            math.floor(abs(diff.days) * 24 + abs(diff.seconds) / 60 / 60))
        step = timedelta(hours=1)
    elif Configuration.DATABASE_SAMPLING_INTERVAL is Interval.minute:
        number_of_steps = int(
예제 #9
0
    def run(self,
            database,
            start=Timestamp(Configuration.DATABASE_START_DATE),
            end=Timestamp(),
            log=None):

        assert isinstance(start, Timestamp)
        assert isinstance(end, Timestamp)
        assert start != end

        def add_to_log(text):
            if log is not None:
                log(text)

        add_to_log('Starting database acquistion')

        for source in ScraperRegistry.get_all():
            #
            # Query database for all points in time this scraper (or any other filling the
            # same database slots) already got data for. Afterwards, the set of timestamps
            # will contain entries for all points in time where the scraper provided
            # complete data. If any id has missing content, we assume to be a data hole there
            # because the scraper might only be able to retrieve the data in a block for all
            # ids.
            #
            timestamps = None

            add_to_log('  Processing scraper \'{id}\''.format(id=source.id))

            for channel in source.get_channels():
                entries = database.get(channel.id)

                if timestamps is None:
                    timestamps = set([entry.timestamp for entry in entries])
                else:
                    timestamps &= set([entry.timestamp for entry in entries])

            #
            # Compute interval (first missing and last missing entry) which is still
            # in need of data
            #
            source_start = start
            source_end = end

            while source_start < source_end and source_start in timestamps:
                source_start.advance(
                    step=+Configuration.DATABASE_SAMPLING_STEP)

            while source_end > source_start and source_end in timestamps:
                source_end.advance(step=-Configuration.DATABASE_SAMPLING_STEP)

            add_to_log(
                '    Scraping in time interval \'{start}\' to \'{end}\''.
                format(start=source_start, end=source_end))

            if source_start != source_end or source_start not in timestamps:
                source.run(
                    database, source_start, source_end,
                    Configuration.DATABASE_SAMPLING_INTERVAL,
                    lambda text: add_to_log('    {0}: {1}'.format(
                        source.id, text)))
예제 #10
0
    def test_gap_detection(self):

        database = Database(':memory:')

        entries = []

        entries.append(Entry(Timestamp('2017-08-12 14:00'), 10.0))
        entries.append(Entry(Timestamp('2017-08-12 15:00'), 12.0))
        entries.append(Entry(Timestamp('2017-08-12 16:00'), 14.0))
        entries.append(Entry(Timestamp('2017-08-12 17:00'), 14.0))

        database.add('Test::TST', entries)

        scr = TestScraper()
        ScraperRegistry.scrapers = {}
        ScraperRegistry.register(scr)

        acquirer = Acquirer()

        scr.refresh = (None, None)
        acquirer.run(database, Timestamp('2017-08-12 12:00'),
                     Timestamp('2017-08-12 16:00'))
        self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00'))
        self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00'))

        scr.refresh = (None, None)
        acquirer.run(database, Timestamp('2017-08-12 12:00'),
                     Timestamp('2017-08-12 17:00'))
        self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00'))
        self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00'))

        scr.refresh = (None, None)
        acquirer.run(database, Timestamp('2017-08-12 14:00'),
                     Timestamp('2017-08-12 19:00'))
        self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 18:00'))
        self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 19:00'))

        scr.refresh = (None, None)
        acquirer.run(database, Timestamp('2017-08-12 14:00'),
                     Timestamp('2017-08-12 17:00'))
        self.assertEqual(scr.refresh[0], None)
        self.assertEqual(scr.refresh[1], None)
예제 #11
0
    def test_timestamp_advance(self):

        s = Timestamp('2017-04-21 14h')
        s.advance(hours=+2)
        self.assertEqual(s, Timestamp('2017-04-21 16h'))

        s = Timestamp('2017-04-21 14:00')
        s.advance(hours=-3)
        self.assertEqual(s, Timestamp('2017-04-21 11:00'))

        s = Timestamp('2017-02-17 01:00')
        s.advance(hours=-2)
        self.assertEqual(s, Timestamp('2017-02-16 23:00'))

        s = Timestamp('2017-02-17 23:00')
        s.advance(hours=+2)
        self.assertEqual(s, Timestamp('2017-02-18 01:00'))

        s = Timestamp('2017-02-17 23:00')
        s.advance(days=+3, hours=+2)
        self.assertEqual(s, Timestamp('2017-02-21 01:00'))

        s = Timestamp('2017-02-17 23:00')
        s.advance(days=-5, hours=+2)
        self.assertEqual(s, Timestamp('2017-02-13 01:00'))

        s = Timestamp('2017-02-19 22:00')
        s.advance(step=timedelta(hours=+1))
        self.assertEqual(s, Timestamp('2017-02-19 23:00'))
        s.advance(step=timedelta(hours=+1))
        self.assertEqual(s, Timestamp('2017-02-20 00:00'))
        s.advance(step=timedelta(hours=+1))
        self.assertEqual(s, Timestamp('2017-02-20 01:00'))
        s.advance(step=timedelta(hours=-1))
        self.assertEqual(s, Timestamp('2017-02-20 00:00'))
        s.advance(step=timedelta(hours=-1))
        self.assertEqual(s, Timestamp('2017-02-19 23:00'))
예제 #12
0
    def run(self, database, start, end, interval, log):

        assert isinstance(start, Timestamp)
        assert isinstance(end, Timestamp)
        assert isinstance(interval, Interval)

        def add_to_log(message):
            if log is not None:
                log(message)

        client = api.cryptocompare.CryptoCompare()

        #
        # Iterate over each channel
        #
        for channel in self.get_channels():
            add_to_log('Scraping information for {channel}'.format(
                channel=channel.id))

            #
            # We are scraping backwards in time because the CryptoCompare REST API will only
            # support a 'to timestamp' parameter.
            #
            try:
                to = end

                entries = []

                ok = True
                while ok and to >= start:

                    token = self.split_channel_id(channel.id).token
                    add_to_log(
                        'Fetching information for {token} until {to}'.format(
                            token=token, to=to))

                    prices = client.get_historical_prices(id=token,
                                                          to=to,
                                                          interval=interval)
                    ok = False

                    for price in prices:
                        price_time = Timestamp(price['time'])
                        price = (price['high'] + price['low']) / 2

                        #
                        # The REST API returns '0' for times where no information is available instead of
                        # raising an exception.
                        #
                        if price_time >= Timestamp(
                                Configuration.DATABASE_START_DATE
                        ) and price > 0:
                            entries.append(
                                Entry(timestamp=price_time, value=price))

                        if price_time < to:
                            to = price_time
                            ok = True

                    to.advance(step=-Configuration.DATABASE_SAMPLING_STEP)

                database.add(channel.id, entries)

            except api.cryptocompare.HTTPError as e:
                add_to_log('ERROR: {error}'.format(error=e.message))
예제 #13
0
                database.add(channel.id, entries)

            except api.cryptocompare.HTTPError as e:
                add_to_log('ERROR: {error}'.format(error=e.message))


#--------------------------------------------------------------------------
# MAIN
#
if __name__ == '__main__':

    database = Database(':memory:')

    scraper = ScraperRegistry.get(CryptoCompareScraper.ID)
    scraper.run(database=database,
                start=Timestamp(Configuration.DATABASE_START_DATE),
                end=Timestamp.now(),
                interval=Interval.day,
                log=lambda text: print(text))

    frame = pd.DataFrame(
        columns=['id', 'description', 'start', 'end', 'entries'])

    for channel in database.get_all_channels():
        entries = database.get(channel.id)
        timestamps = [entry.timestamp for entry in entries]

        frame.loc[len(frame)] = [
            channel.id, channel.description,
            min(timestamps) if timestamps else '-',
            max(timestamps) if timestamps else '-',