def bootstrap(): """ Bootstraps the data provider unix service. It instantiates the Configuration Manager, Dataset Manager, Communication Manager and the Execution Pipeline. """ # 1. Set up Configuration Manager. config_manager = ConfigurationManager() config_manager.bootstrap() # 2. Set up the IPFS Client used by the service config = config_manager.get_config() client = None try: client = ipfsapi.connect(config.get('BLOCKCHAIN', 'host'), config.getint('BLOCKCHAIN', 'ipfs_port')) except Exception as e: # TODO: Can this log the exception? # logging.info("IPFS daemon not started, got: {0}".format(e)) raise (e) # 2. Set up Dataset Manager. dataset_manager = DatasetManager(config_manager=config_manager) dataset_manager.configure(ipfs_client=client) # 3. Set up the Communication Manager. communication_manager = CommunicationManager() # 4. Set up the Execution Pipeline (Scheduler, Runners) # and run the Scheduler's cron on a new thread. scheduler = DMLScheduler(config_manager=config_manager, ) scheduler.configure(communication_manager=communication_manager, ipfs_client=client) t1 = threading.Thread( target=scheduler.start_cron, args=(0.05, ), daemon=False, ) t1.start() # 5. Configure the Communication Manager with the components it talks to. communication_manager.configure(scheduler=scheduler) # 6. Set up Blockchain Gateway and start listening on a new thread. blockchain_gateway = BlockchainGateway() blockchain_gateway.configure(config_manager=config_manager, communication_manager=communication_manager, ipfs_client=client) t2 = threading.Thread( target=blockchain_gateway.start_cron, args=(0.05, ), daemon=False, ) t2.start() # 7. Wait for the threads to end. # TODO: Need to make it work as a daemon. t1.join()
def setup_client(config_manager, client): """ Set up and return communication_manager, blockchain_gateway, scheduler """ communication_manager = CommunicationManager() blockchain_gateway = BlockchainGateway() scheduler = DMLScheduler(config_manager) dataset_manager = DatasetManager(config_manager) dataset_manager.bootstrap() communication_manager.configure(scheduler, dataset_manager) blockchain_gateway.configure(config_manager, communication_manager, client) scheduler.configure(communication_manager, client) return communication_manager, blockchain_gateway, scheduler
def test_bad_format(bad_config_manager_format): """ Test that DSM invalidates dataset with invalid CSV format. """ format_message = ("The file {file} in folder {folder} was improperly " "formatted. Please refer to the following error " "message from pandas for more information: {message}") format_message = format_message.format(file='test1.csv', folder='test1', message='list index out of range') try: dataset_manager = DatasetManager(bad_config_manager_format) raise Exception("Error should have been thrown for bad format") except Exception as e: assert str(e) == format_message, "Wrong assertion was thrown!"
def test_no_header(bad_config_manager_header): """ Test that DSM invalidates dataset with no header. """ error_message = ("No header has been provided in file {file} in folder " "{folder}") error_message = error_message.format( file='test1.csv', folder='test1', ) try: dataset_manager = DatasetManager(bad_config_manager_header) raise Exception("Error should have been thrown for lack of header") except AssertionError as e: assert str(e) == error_message, "Wrong assertion was thrown!"
def good_dataset_manager(good_config_manager): return DatasetManager(good_config_manager)
def dataset_manager(config_manager): dataset_manager = DatasetManager(config_manager) dataset_manager.bootstrap() return dataset_manager