Example #1
0
def test_get_backup_resource():
    ProjectManager.set_path(None)
    with pytest.raises(NotInitilizedError):
        ResourceManager.backup_resources()
    ProjectManager.set_path('tests/test-project')
    assert len(ResourceManager.backup_resources()) == 1
    assert isinstance(ResourceManager.backup_resources()[0], LocalDisk)
Example #2
0
def test_do_backup(data):
    ProjectManager.set_path('tests/test-project')
    for message in data:
        ResourceManager._do_backup(message)
    local_backup_path = ResourceManager.backup_resources()[0].path
    assert len(os.listdir(local_backup_path)) == 8
    assert '2018-08-21 06:21:35.json' in os.listdir(local_backup_path)

    for f in os.listdir(local_backup_path):
        os.remove(os.path.join(local_backup_path, f))
Example #3
0
def safe_init(project_path):
    overwrite = input(
        'WARNING: Path specified already exists. Any configuration will be overwritten. Proceed?[Y/n]'
    )
    if overwrite == 'Y':
        ProjectManager.set_path(project_path)
        ProjectManager.init_project()
    elif overwrite == 'n':
        sys.exit()
    else:
        safe_init(project_path)
Example #4
0
def test_do_backup(data):
    ProjectManager.set_path('tests/test-project')
    # print(ResourceManager.get_latest_backed_up_time())
    ResourceManager._do_backup(data[0:5])
    dm = DataManager
    dm.do_backup(chunk=5, stream_from=data)
    bdup_files = [
        i for i in os.listdir('tests/test-project/kafka/backups')
        if i.endswith('json')
    ]
    assert len(bdup_files) == 5
Example #5
0
def test_get_latest_backup(data):
    ProjectManager.set_path('tests/test-project')
    for message in data:
        ResourceManager._do_backup(message)

    assert isinstance(ResourceManager.get_latest_backups(), dict)
    assert 'LocalDisk' in ResourceManager.get_latest_backups().keys()

    local_backup_path = ResourceManager.backup_resources()[0].path
    for f in os.listdir(local_backup_path):
        os.remove(os.path.join(local_backup_path, f))
Example #6
0
def test_project_manager():
    ProjectManager.set_path('tests/test-project')
    assert ProjectManager.KRYPTOFLOW_DIR == 'tests/test-project'

    ProjectManager.set_path('tests/test-project-2')
    ProjectManager.create_subdirs()
    assert os.path.isdir('tests/test-project-2/kafka/logs')
    shutil.rmtree('tests/test-project-2')

    ProjectManager.set_path('tests/test-project-2')
    ProjectManager.create_template()
    for file in TEMPLATE_PROJECT_FILES:
        file_path = file.replace('template/', '')
        assert os.path.isfile(
            os.path.join(ProjectManager.KRYPTOFLOW_DIR, file_path))
    shutil.rmtree('tests/test-project-2')
Example #7
0
def init(name, path):
    project_path = os.path.join(path, name)
    if os.path.isdir(project_path):
        safe_init(project_path)
    else:
        ProjectManager.create_dir(project_path)
        ProjectManager.set_path(project_path)
        ProjectManager.init_project()
Example #8
0
def scrape(monitor, source):
    """

    Parameters
    ----------
    monitor: bool
        Start supervisord monitoring server

    source: str
        'all', 'gdax', 'reddit', 'twitter'

    Returns
    -------

    """

    ProjectManager.set_path('.')
    main([
        '-c',
        os.path.join(ProjectManager.KRYPTOFLOW_DIR,
                     ProjectManager.get_value('supervisor'))
    ])
    if monitor:
        print('monitoring')
Example #9
0
        'side': 'buy'
    }, {
        'price': 6379.99,
        'ts': '2018-08-21 06:21:57',
        'volume_24h': 8347.08598465,
        'side': 'sell'
    }, {
        'price': 6380.0,
        'ts': '2018-08-21 06:22:00',
        'volume_24h': 8347.21550695,
        'side': 'buy'
    }, {
        'price': 6380.28,
        'ts': '2018-08-21 06:22:00',
        'volume_24h': 8347.21668217,
        'side': 'buy'
    }, {
        'price': 6380.46,
        'ts': '2018-08-21 06:22:04',
        'volume_24h': 8347.06809264,
        'side': 'buy'
    }]

    from kryptoflow.managers.project import ProjectManager
    ProjectManager.set_path('tests/test-project')
    # print(ResourceManager.get_latest_backed_up_time())
    ResourceManager._do_backup(test_data[0:5])
    dm = DataManager
    dm.do_backup(chunk=2, stream_from=test_data)
    print(os.listdir('tests/test-project/kafka/backups'))
Example #10
0
from kafka_tfrx.stream import KafkaStream

from kryptoflow.scrapers.reddit import RedditStreamer
from kryptoflow.managers.project import ProjectManager

if __name__ == '__main__':
    schemas = ProjectManager.get_value('kafka')['schemas']
    secrets = ProjectManager.get_secrets('reddit')
    sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit')
    r = RedditStreamer(producer=sink, reddit_config=secrets)
    r.start_stream()
Example #11
0
def project_manager():
    p = ProjectManager()
    p.set_path('tests/test-project')
    p.set_config(init=False)
    return p
Example #12
0
from kafka_tfrx.stream import KafkaStream
from kryptoflow.scrapers.gdax_ws import GDAXClient
from kryptoflow.managers.project import ProjectManager

if __name__ == '__main__':

    schemas = ProjectManager.get_value('kafka')['schemas']
    sink = KafkaStream.avro_producer(topic='gdax', ip='localhost', schemas=schemas)
    gd = GDAXClient(products=['BTC-USD'], channels=['ticker'], producer=sink)
    gd.start_stream()
Example #13
0
def test_get_secrets():
    ProjectManager.set_path('tests/test-project')
    assert isinstance(ProjectManager.get_secrets('reddit'), dict)
    assert 'client_id' in ProjectManager.get_secrets('reddit').keys()
Example #14
0
def test_project():
    ProjectManager.set_path(None)
    with pytest.raises(NotInitilizedError):
        ProjectManager.get_models()
Example #15
0
            Key=format_backup('backups', stream[0]))
        _logger.info(f'Backup saved to S3 for data starting at: {stream[0]["ts"]}')

    def get_last(self):
        objects = sorted(self.client.list_objects(Bucket=self.bucket)['Contents'],
                         key=lambda x: fname_to_datetime(fname_from_object(x)),
                         reverse=True)[0]
        event = json.loads(objects.get()['Body'].read().decode('utf-8'))[-1]
        return datetime.strptime(event[-1]['ts'], '%Y-%m-%d %H:%M:%S')


def fname_from_object(object_path):
    return os.path.basename(object_path['Key'])


def format_backup(path, message):
    return os.path.join(path, message['ts'] + '.json')


def fname_to_datetime(fname):
    return datetime.strptime(fname.split('.')[0], '%Y-%m-%d %H:%M:%S')


if __name__ == '__main__':
    ProjectManager.set_path('kryptoflow/template')
    rm = ResourceManager.backup_resources()

        # ResourceManager.do_backup(chunk)

    print(ResourceManager.get_latest_backups())
Example #16
0
from kafka_tfrx.stream import KafkaStream

from kryptoflow.scrapers.twitter import TwitterStream
from kryptoflow.managers.project import ProjectManager


if __name__ == '__main__':
    schemas = ProjectManager.get_value('kafka')['schemas']
    secrets = ProjectManager.get_secrets('twitter')

    sink = KafkaStream.avro_producer(schemas=schemas, topic='reddit')
    tweet_stream = TwitterStream(producer=sink, twitter_config=secrets)
    tweet_stream.start()