def test_database_overwrite (self): # # Create database # database = Database (':memory:') # # Setup some coin entries # entries = [] entries.append (Entry (timestamp=Timestamp ('2017-06-18 12:00'), value=230.0)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=2200.12)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 21:00'), value=240.0)) entries.append (Entry (timestamp=Timestamp ('2017-06-18 15:00'), value=242.0)) database.add ('Test::ETH', entries) entries = database.get ('Test::ETH') self.assertEqual (len (entries), 3) for entry in entries: if entry.timestamp == Timestamp ('2017-06-18 15:00'): self.assertEqual (entry.value, 242.0)
def test_gap_detection(self): database = Database(':memory:') entries = [] entries.append(Entry(Timestamp('2017-08-12 14:00'), 10.0)) entries.append(Entry(Timestamp('2017-08-12 15:00'), 12.0)) entries.append(Entry(Timestamp('2017-08-12 16:00'), 14.0)) entries.append(Entry(Timestamp('2017-08-12 17:00'), 14.0)) database.add('Test::TST', entries) scr = TestScraper() ScraperRegistry.scrapers = {} ScraperRegistry.register(scr) acquirer = Acquirer() scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 12:00'), Timestamp('2017-08-12 16:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 12:00'), Timestamp('2017-08-12 17:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 12:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 13:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 14:00'), Timestamp('2017-08-12 19:00')) self.assertEqual(scr.refresh[0], Timestamp('2017-08-12 18:00')) self.assertEqual(scr.refresh[1], Timestamp('2017-08-12 19:00')) scr.refresh = (None, None) acquirer.run(database, Timestamp('2017-08-12 14:00'), Timestamp('2017-08-12 17:00')) self.assertEqual(scr.refresh[0], None) self.assertEqual(scr.refresh[1], None)
def __init__(self, key: str, content: Optional[Union[str, bytes]], statistics: Optional[Statistics] = None): self.file_name = key.replace("/tmp/s3/", "") self.file_name = key.replace("/tmp/", "") self.file_name = self.file_name.replace("/", "-") self.file_name = "/tmp/s3/" + self.file_name if statistics is None: statistics = Statistics() Entry.__init__(self, key, None, statistics) if content is not None: if type(content) == str: options = "w+" else: options = "wb+" with open(self.file_name, options) as f: f.write(content) self.length = len(content) else: self.length = os.path.getsize(self.file_name) self.last_modified = time.time()
def run (self, database, start, end, interval, log): credentials = self.get_credentials (database) server = twitter.Twitter (auth=twitter.OAuth (credentials['access_key'], credentials['access_secret'], credentials['consumer_key'], credentials['consumer_secret'])) for channel, tags in TwitterScraper.CHANNELS.items (): query = server.search.tweets (q=' '.join (tags), count=100) entries = [] for q in query['statuses']: tweet = self.to_string (q['text']) tweet = self.tokenize (tweet) tweet = [token if self.emoticon_regexp.search (token) else token.lower () for token in tweet] entries.append (Entry (timestamp=Timestamp (q['created_at']), value=json.dumps (tweet))) database.add (TwitterScraper.ID + '::' + channel, entries)
def test_database_read_write (self): # # Create database # database = Database (':memory:') # # Add some entries # eth_entries = [] eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 12:00'), value=234.32)) eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 14:00'), value=240.00)) eth_entries.append (Entry (timestamp=Timestamp ('2017-04-21 16:00'), value=272.98)) database.add ('Test::ETH', eth_entries) btc_entries = [] btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 13:00'), value=230.00)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 15:00'), value=242.00)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 17:00'), value=270.98)) btc_entries.append (Entry (timestamp=Timestamp ('2017-04-22 19:00'), value=272.78)) database.add ('Test::BTC', btc_entries) entries = database.get_all_channels () self.assertTrue (len (entries) >= 3) self.assertTrue ('Test::ETH' in [entry.id for entry in entries]) self.assertTrue ('Test::Twitter::ETH' in [entry.id for entry in entries]) entries = database.get ('Test::ETH') self.assertEqual (len (entries), 3) entries = database.get ('Test::BTC') self.assertEqual (len (entries), 4)
def run(self, database, start, end, interval, log): assert isinstance(start, Timestamp) assert isinstance(end, Timestamp) assert isinstance(interval, Interval) def add_to_log(message): if log is not None: log(message) client = api.cryptocompare.CryptoCompare() # # Iterate over each channel # for channel in self.get_channels(): add_to_log('Scraping information for {channel}'.format( channel=channel.id)) # # We are scraping backwards in time because the CryptoCompare REST API will only # support a 'to timestamp' parameter. # try: to = end entries = [] ok = True while ok and to >= start: token = self.split_channel_id(channel.id).token add_to_log( 'Fetching information for {token} until {to}'.format( token=token, to=to)) prices = client.get_historical_prices(id=token, to=to, interval=interval) ok = False for price in prices: price_time = Timestamp(price['time']) price = (price['high'] + price['low']) / 2 # # The REST API returns '0' for times where no information is available instead of # raising an exception. # if price_time >= Timestamp( Configuration.DATABASE_START_DATE ) and price > 0: entries.append( Entry(timestamp=price_time, value=price)) if price_time < to: to = price_time ok = True to.advance(step=-Configuration.DATABASE_SAMPLING_STEP) database.add(channel.id, entries) except api.cryptocompare.HTTPError as e: add_to_log('ERROR: {error}'.format(error=e.message))
def __init__(self, key: str, resources: Any, statistics: Statistics): Entry.__init__(self, key, resources, statistics)