def test_get_backup_resource(): ProjectManager.set_path(None) with pytest.raises(NotInitilizedError): ResourceManager.backup_resources() ProjectManager.set_path('tests/test-project') assert len(ResourceManager.backup_resources()) == 1 assert isinstance(ResourceManager.backup_resources()[0], LocalDisk)
def test_do_backup(data): ProjectManager.set_path('tests/test-project') for message in data: ResourceManager._do_backup(message) local_backup_path = ResourceManager.backup_resources()[0].path assert len(os.listdir(local_backup_path)) == 8 assert '2018-08-21 06:21:35.json' in os.listdir(local_backup_path) for f in os.listdir(local_backup_path): os.remove(os.path.join(local_backup_path, f))
def safe_init(project_path): overwrite = input( 'WARNING: Path specified already exists. Any configuration will be overwritten. Proceed?[Y/n]' ) if overwrite == 'Y': ProjectManager.set_path(project_path) ProjectManager.init_project() elif overwrite == 'n': sys.exit() else: safe_init(project_path)
def test_do_backup(data): ProjectManager.set_path('tests/test-project') # print(ResourceManager.get_latest_backed_up_time()) ResourceManager._do_backup(data[0:5]) dm = DataManager dm.do_backup(chunk=5, stream_from=data) bdup_files = [ i for i in os.listdir('tests/test-project/kafka/backups') if i.endswith('json') ] assert len(bdup_files) == 5
def test_get_latest_backup(data): ProjectManager.set_path('tests/test-project') for message in data: ResourceManager._do_backup(message) assert isinstance(ResourceManager.get_latest_backups(), dict) assert 'LocalDisk' in ResourceManager.get_latest_backups().keys() local_backup_path = ResourceManager.backup_resources()[0].path for f in os.listdir(local_backup_path): os.remove(os.path.join(local_backup_path, f))
def test_project_manager(): ProjectManager.set_path('tests/test-project') assert ProjectManager.KRYPTOFLOW_DIR == 'tests/test-project' ProjectManager.set_path('tests/test-project-2') ProjectManager.create_subdirs() assert os.path.isdir('tests/test-project-2/kafka/logs') shutil.rmtree('tests/test-project-2') ProjectManager.set_path('tests/test-project-2') ProjectManager.create_template() for file in TEMPLATE_PROJECT_FILES: file_path = file.replace('template/', '') assert os.path.isfile( os.path.join(ProjectManager.KRYPTOFLOW_DIR, file_path)) shutil.rmtree('tests/test-project-2')
def init(name, path): project_path = os.path.join(path, name) if os.path.isdir(project_path): safe_init(project_path) else: ProjectManager.create_dir(project_path) ProjectManager.set_path(project_path) ProjectManager.init_project()
def scrape(monitor, source): """ Parameters ---------- monitor: bool Start supervisord monitoring server source: str 'all', 'gdax', 'reddit', 'twitter' Returns ------- """ ProjectManager.set_path('.') main([ '-c', os.path.join(ProjectManager.KRYPTOFLOW_DIR, ProjectManager.get_value('supervisor')) ]) if monitor: print('monitoring')
'side': 'buy' }, { 'price': 6379.99, 'ts': '2018-08-21 06:21:57', 'volume_24h': 8347.08598465, 'side': 'sell' }, { 'price': 6380.0, 'ts': '2018-08-21 06:22:00', 'volume_24h': 8347.21550695, 'side': 'buy' }, { 'price': 6380.28, 'ts': '2018-08-21 06:22:00', 'volume_24h': 8347.21668217, 'side': 'buy' }, { 'price': 6380.46, 'ts': '2018-08-21 06:22:04', 'volume_24h': 8347.06809264, 'side': 'buy' }] from kryptoflow.managers.project import ProjectManager ProjectManager.set_path('tests/test-project') # print(ResourceManager.get_latest_backed_up_time()) ResourceManager._do_backup(test_data[0:5]) dm = DataManager dm.do_backup(chunk=2, stream_from=test_data) print(os.listdir('tests/test-project/kafka/backups'))
from kafka_tfrx.stream import KafkaStream from kryptoflow.scrapers.reddit import RedditStreamer from kryptoflow.managers.project import ProjectManager if __name__ == '__main__': schemas = ProjectManager.get_value('kafka')['schemas'] secrets = ProjectManager.get_secrets('reddit') sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit') r = RedditStreamer(producer=sink, reddit_config=secrets) r.start_stream()
def project_manager(): p = ProjectManager() p.set_path('tests/test-project') p.set_config(init=False) return p
from kafka_tfrx.stream import KafkaStream from kryptoflow.scrapers.gdax_ws import GDAXClient from kryptoflow.managers.project import ProjectManager if __name__ == '__main__': schemas = ProjectManager.get_value('kafka')['schemas'] sink = KafkaStream.avro_producer(topic='gdax', ip='localhost', schemas=schemas) gd = GDAXClient(products=['BTC-USD'], channels=['ticker'], producer=sink) gd.start_stream()
def test_get_secrets(): ProjectManager.set_path('tests/test-project') assert isinstance(ProjectManager.get_secrets('reddit'), dict) assert 'client_id' in ProjectManager.get_secrets('reddit').keys()
def test_project(): ProjectManager.set_path(None) with pytest.raises(NotInitilizedError): ProjectManager.get_models()
Key=format_backup('backups', stream[0])) _logger.info(f'Backup saved to S3 for data starting at: {stream[0]["ts"]}') def get_last(self): objects = sorted(self.client.list_objects(Bucket=self.bucket)['Contents'], key=lambda x: fname_to_datetime(fname_from_object(x)), reverse=True)[0] event = json.loads(objects.get()['Body'].read().decode('utf-8'))[-1] return datetime.strptime(event[-1]['ts'], '%Y-%m-%d %H:%M:%S') def fname_from_object(object_path): return os.path.basename(object_path['Key']) def format_backup(path, message): return os.path.join(path, message['ts'] + '.json') def fname_to_datetime(fname): return datetime.strptime(fname.split('.')[0], '%Y-%m-%d %H:%M:%S') if __name__ == '__main__': ProjectManager.set_path('kryptoflow/template') rm = ResourceManager.backup_resources() # ResourceManager.do_backup(chunk) print(ResourceManager.get_latest_backups())
from kafka_tfrx.stream import KafkaStream from kryptoflow.scrapers.twitter import TwitterStream from kryptoflow.managers.project import ProjectManager if __name__ == '__main__': schemas = ProjectManager.get_value('kafka')['schemas'] secrets = ProjectManager.get_secrets('twitter') sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit') tweet_stream = TwitterStream(producer=sink, twitter_config=secrets) tweet_stream.start()