def test_database_overwrite (self): # # Create database # database = Database (':memory:') # # Setup some coin entries # entries = [] entries.append (Entry (timestamp=Timestamp ('2017-06-18 12:00'), value=230.0)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=2200.12)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 21:00'), value=240.0)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=242.0)) database.add ('Test::ETH', entries) entries = database.get ('Test::ETH') self.assertEqual (len (entries), 3) for entry in entries: if entry.timestamp == Timestamp ('2017-06-18 15:00'): self.assertEqual (entry.value, 242.0)
def test_historical_prices(): client = CryptoCompare() prices = client.get_historical_prices('ETH', Timestamp('2016-04-08 06:00'), Interval.hour) print(len(prices)) if prices: print(Timestamp(prices[1]['time'])) print(Timestamp(prices[-1]['time']))
def test_timestamp_create(self): # # Timestamp for 'now' must be always in UTC time # s1 = Timestamp() s2 = Timestamp(datetime.utcnow()) s3 = Timestamp(datetime.utcnow().timestamp()) self.assertEqual(s1, s2) self.assertEqual(s1, s3) s1 = Timestamp('2017-04-21 14:00') s2 = Timestamp('2017-04-21 14:30') s3 = Timestamp('2017-04-21 14:59') s4 = Timestamp('2017-04-21 15:00') s5 = Timestamp('2017-04-22 14:30') # # Parsed dates must not be changes to other time zones # self.assertEqual(s1.timestamp.hour, 14) self.assertEqual(s2.timestamp.hour, 14) self.assertEqual(s3.timestamp.hour, 14) self.assertEqual(s4.timestamp.hour, 15) self.assertEqual(s5.timestamp.hour, 14) self.assertEqual(s1, s2) self.assertEqual(s1, s3) self.assertNotEqual(s1, s4) self.assertNotEqual(s1, s5)
def test_error(): client = CryptoCompare() try: client.get_historical_prices('XYZ', Timestamp('2016-04-08 06:00'), Interval.hour) except HTTPError as e: print('ERROR:', e.message)
def get(self, id): assert id is not Database.CHANNELS_ID assert id is not Database.CREDENTIALS_ID channel = self.get_channel(id) assert channel command = 'SELECT * FROM "{channel}"'.format(channel=id) rows = self.cursor.execute(command) return [ Entry(timestamp=Timestamp(row[0]), value=row[1]) for row in rows ]
def run (self, database, start, end, interval, log): credentials = self.get_credentials (database) server = twitter.Twitter (auth=twitter.OAuth (credentials['access_key'], credentials['access_secret'], credentials['consumer_key'], credentials['consumer_secret'])) for channel, tags in TwitterScraper.CHANNELS.items (): query = server.search.tweets (q=' '.join (tags), count=100) entries = [] for q in query['statuses']: tweet = self.to_string (q['text']) tweet = self.tokenize (tweet) tweet = [token if self.emoticon_regexp.search (token) else token.lower () for token in tweet] entries.append (Entry (timestamp=Timestamp (q['created_at']), value=json.dumps (tweet))) database.add (TwitterScraper.ID + '::' + channel, entries)
def test_database_read_write (self): # # Create database # database = Database (':memory:') # # Add some entries # eth_entries = [] eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 12:00'), value=234.32)) eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 14:00'), value=240.00)) eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 16:00'), value=272.98)) database.add ('Test::ETH', eth_entries) btc_entries = [] btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 13:00'), value=230.00)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 15:00'), value=242.00)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 17:00'), value=270.98)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 19:00'), value=272.78)) database.add ('Test::BTC', btc_entries) entries = database.get_all_channels () self.assertTrue (len (entries) >= 3) self.assertTrue ('Test::ETH' in [entry.id for entry in entries]) self.assertTrue ('Test::Twitter::ETH' in [entry.id for entry in entries]) entries = database.get ('Test::ETH') self.assertEqual (len (entries), 3) entries = database.get ('Test::BTC') self.assertEqual (len (entries), 4)
default=None, help='Passwort for database encryption') parser.add_argument('database', type=str, default=None, help='Database file') args = parser.parse_args() database = Database(args.database, args.password) # # Fetch all entries from database and compute earliest entry. The latest entry is # always expected to be at the current date. # minimum_timestamp = Timestamp(Configuration.DATABASE_START_DATE) maximum_timestamp = Timestamp.now() diff = maximum_timestamp - minimum_timestamp number_of_steps = 0 step = None if Configuration.DATABASE_SAMPLING_INTERVAL is Interval.day: number_of_steps = abs(diff.days) step = timedelta(days=1) elif Configuration.DATABASE_SAMPLING_INTERVAL is Interval.hour: number_of_steps = int( math.floor(abs(diff.days) * 24 + abs(diff.seconds) / 60 / 60)) step = timedelta(hours=1) elif Configuration.DATABASE_SAMPLING_INTERVAL is Interval.minute: number_of_steps = int(
def run(self, database, start=Timestamp(Configuration.DATABASE_START_DATE), end=Timestamp(), log=None): assert isinstance(start, Timestamp) assert isinstance(end, Timestamp) assert start != end def add_to_log(text): if log is not None: log(text) add_to_log('Starting database acquistion') for source in ScraperRegistry.get_all(): # # Query database for all points in time this scraper (or any other filling the # same database slots) already got data for. Afterwards, the set of timestamps # will contain entries for all points in time where the scraper provided # complete data. If any id has missing content, we assume to be a data hole there # because the scraper might only be able to retrieve the data in a block for all # ids. # timestamps = None add_to_log(' Processing scraper \'{id}\''.format(id=source.id)) for channel in source.get_channels(): entries = database.get(channel.id) if timestamps is None: timestamps = set([entry.timestamp for entry in entries]) else: timestamps &= set([entry.timestamp for entry in entries]) # # Compute interval (first missing and last missing entry) which is still # in need of data # source_start = start source_end = end while source_start < source_end and source_start in timestamps: source_start.advance( step=+Configuration.DATABASE_SAMPLING_STEP) while source_end > source_start and source_end in timestamps: source_end.advance(step=-Configuration.DATABASE_SAMPLING_STEP) add_to_log( ' Scraping in time interval \'{start}\' to \'{end}\''. format(start=source_start, end=source_end)) if source_start != source_end or source_start not in timestamps: source.run( database, source_start, source_end, Configuration.DATABASE_SAMPLING_INTERVAL, lambda text: add_to_log(' {0}: {1}'.format( source.id, text)))
def test_gap_detection(self): database = Database(':memory:') entries = [] entries.append(Entry(Timestamp('2017-08-12 14:00'), 10.0)) entries.append(Entry(Timestamp('2017-08-12 15:00'), 12.0)) entries.append(Entry(Timestamp('2017-08-12 16:00'), 14.0)) entries.append(Entry(Timestamp('2017-08-12 17:00'), 14.0)) database.add('Test::TST', entries) scr = TestScraper() ScraperRegistry.scrapers = {} ScraperRegistry.register(scr) acquirer = Acquirer() scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 12:00'), Timestamp('2017-08-12 16:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 12:00'), Timestamp('2017-08-12 17:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 14:00'), Timestamp('2017-08-12 19:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 18:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 19:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 14:00'), Timestamp('2017-08-12 17:00')) self.assertEqual(scr.refresh[0], None) self.assertEqual(scr.refresh[1], None)
def test_timestamp_advance(self): s = Timestamp('2017-04-21 14h') s.advance(hours=+2) self.assertEqual(s, Timestamp('2017-04-21 16h')) s = Timestamp('2017-04-21 14:00') s.advance(hours=-3) self.assertEqual(s, Timestamp('2017-04-21 11:00')) s = Timestamp('2017-02-17 01:00') s.advance(hours=-2) self.assertEqual(s, Timestamp('2017-02-16 23:00')) s = Timestamp('2017-02-17 23:00') s.advance(hours=+2) self.assertEqual(s, Timestamp('2017-02-18 01:00')) s = Timestamp('2017-02-17 23:00') s.advance(days=+3, hours=+2) self.assertEqual(s, Timestamp('2017-02-21 01:00')) s = Timestamp('2017-02-17 23:00') s.advance(days=-5, hours=+2) self.assertEqual(s, Timestamp('2017-02-13 01:00')) s = Timestamp('2017-02-19 22:00') s.advance(step=timedelta(hours=+1)) self.assertEqual(s, Timestamp('2017-02-19 23:00')) s.advance(step=timedelta(hours=+1)) self.assertEqual(s, Timestamp('2017-02-20 00:00')) s.advance(step=timedelta(hours=+1)) self.assertEqual(s, Timestamp('2017-02-20 01:00')) s.advance(step=timedelta(hours=-1)) self.assertEqual(s, Timestamp('2017-02-20 00:00')) s.advance(step=timedelta(hours=-1)) self.assertEqual(s, Timestamp('2017-02-19 23:00'))
def run(self, database, start, end, interval, log): assert isinstance(start, Timestamp) assert isinstance(end, Timestamp) assert isinstance(interval, Interval) def add_to_log(message): if log is not None: log(message) client = api.cryptocompare.CryptoCompare() # # Iterate over each channel # for channel in self.get_channels(): add_to_log('Scraping information for {channel}'.format( channel=channel.id)) # # We are scraping backwards in time because the CryptoCompare REST API will only # support a 'to timestamp' parameter. # try: to = end entries = [] ok = True while ok and to >= start: token = self.split_channel_id(channel.id).token add_to_log( 'Fetching information for {token} until {to}'.format( token=token, to=to)) prices = client.get_historical_prices(id=token, to=to, interval=interval) ok = False for price in prices: price_time = Timestamp(price['time']) price = (price['high'] + price['low']) / 2 # # The REST API returns '0' for times where no information is available instead of # raising an exception. # if price_time >= Timestamp( Configuration.DATABASE_START_DATE ) and price > 0: entries.append( Entry(timestamp=price_time, value=price)) if price_time < to: to = price_time ok = True to.advance(step=-Configuration.DATABASE_SAMPLING_STEP) database.add(channel.id, entries) except api.cryptocompare.HTTPError as e: add_to_log('ERROR: {error}'.format(error=e.message))
database.add(channel.id, entries) except api.cryptocompare.HTTPError as e: add_to_log('ERROR: {error}'.format(error=e.message)) #-------------------------------------------------------------------------- # MAIN # if __name__ == '__main__': database = Database(':memory:') scraper = ScraperRegistry.get(CryptoCompareScraper.ID) scraper.run(database=database, start=Timestamp(Configuration.DATABASE_START_DATE), end=Timestamp.now(), interval=Interval.day, log=lambda text: print(text)) frame = pd.DataFrame( columns=['id', 'description', 'start', 'end', 'entries']) for channel in database.get_all_channels(): entries = database.get(channel.id) timestamps = [entry.timestamp for entry in entries] frame.loc[len(frame)] = [ channel.id, channel.description, min(timestamps) if timestamps else '-', max(timestamps) if timestamps else '-',