def test_generate_metadata(self): parser = PackageParser() expected_metadata = { "component_types": { "sparkStreaming": { "componentC": { "component_detail": { "properties.json": { "property1": "1", "property2": "two" } }, "component_path": "test_package-1.0.2/sparkStreaming/componentC", "component_name": "componentC" } }, "oozie": { "componentA": { "component_detail": { "properties.json": { "property3": "3", "property4": "four" } }, "component_path": "test_package-1.0.2/oozie/componentA", "component_name": "componentA" }, "componentB": { "component_detail": { "hdfs.json": {}, "properties.json": {} }, "component_path": "test_package-1.0.2/oozie/componentB", "component_name": "componentB" } } }, "package_name": "test_package-1.0.2" } package_name = "test_package-1.0.2" self.assertEqual( parser.get_package_metadata("%s.tar.gz" % package_name), expected_metadata)
def test_generate_metadata(self): parser = PackageParser() expected_metadata = { "component_types": { "sparkStreaming": { "componentC": { "component_detail": { "properties.json": { "property1": "1", "property2": "two" } }, "component_path": "test_package-1.0.2/sparkStreaming/componentC", "component_name": "componentC" } }, "oozie": { "componentA": { "component_detail": { "properties.json": { "property3": "3", "property4": "four" } }, "component_path": "test_package-1.0.2/oozie/componentA", "component_name": "componentA" }, "componentB": { "component_detail": { "hdfs.json": {}, "properties.json": {} }, "component_path": "test_package-1.0.2/oozie/componentB", "component_name": "componentB" } } }, "package_name": "test_package-1.0.2" } package_name = "test_package-1.0.2" self.assertEqual(parser.get_package_metadata("%s.tar.gz" % package_name), expected_metadata)
class DeploymentManager(object): def __init__(self, repository, package_registrar, application_registrar, application_summary_registrar, environment, config): self._repository = repository self._package_registrar = package_registrar self._application_registrar = application_registrar self._environment = environment self._config = config self._application_creator = application_creator.ApplicationCreator( config, environment, environment['namespace']) self._application_summary_registrar = application_summary_registrar self._package_parser = PackageParser() self._package_progress = {} self._lock = threading.RLock() self._authorizer = authorizer_local.AuthorizerLocal() # load number of threads from config file: number_of_threads = self._config["deployer_thread_limit"] assert isinstance(number_of_threads, (int)) assert number_of_threads > 0 self.dispatcher = AsyncDispatcher(num_threads=number_of_threads) self.rest_client = requests def _get_groups(self, user): groups = [] if user: try: groups = [ g.gr_name for g in grp.getgrall() if user in g.gr_mem ] if not pwd.getpwnam(user).pw_gid: gid = pwd.getpwnam(user).pw_gid groups.append(grp.getgrgid(gid).gr_name) except: raise Forbidden('Failed to find details for user "%s"' % user) return groups def _authorize(self, user_name, resource_type, resource_owner, action_name): qualified_action = '%s:%s' % (resource_type, action_name) identity = {'user': user_name, 'groups': self._get_groups(user_name)} resource = {'type': resource_type, 'owner': resource_owner} action = {'name': qualified_action} if not self._authorizer.authorize(identity, resource, action): raise Forbidden('User "%s" does not have authorization for "%s"' % (user_name, qualified_action)) def get_environment(self, user_name): self._authorize(user_name, Resources.ENVIRONMENT, None, Actions.READ) return self._environment def list_packages(self, user_name): self._authorize(user_name, Resources.PACKAGES, None, Actions.READ) logging.info('list_deployed') deployed = self._package_registrar.list_packages() return deployed def _assert_package_status(self, package, required_status): status = self.get_package_info(package)['status'] if status != required_status: if status == PackageDeploymentState.NOTDEPLOYED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) def list_repository(self, recency, user_name): self._authorize(user_name, Resources.REPOSITORY, None, Actions.READ) logging.info("list_available: %s", recency) available = self._repository.get_package_list(user_name, recency) return available def _get_saved_package_data(self, package): package_owner = None package_exists = False package_metadata = None if self._package_registrar.package_exists(package): package_metadata = self._package_registrar.get_package_metadata( package) logging.debug(package_metadata) package_owner = package_metadata['metadata']['user'] package_exists = True return package_owner, package_exists, package_metadata def _get_package_owner(self, package): package_owner, _, _ = self._get_saved_package_data(package) return package_owner def _get_application_owner(self, application): application_owner = None if self._application_registrar.application_has_record(application): application_owner = self._application_registrar.get_application( application)['overrides']['user'] return application_owner def get_package_info(self, package, user_name=None): package_owner, package_exists, metadata = self._get_saved_package_data( package) if user_name is not None: self._authorize(user_name, Resources.PACKAGES, package_owner, Actions.READ) information = None progress_state = self._get_package_progress(package) if progress_state is not None: properties = None status = progress_state name = package.rpartition('-')[0] version = package.rpartition('-')[2] else: # package deploy is not in progress: # get last package status from database deploy_status = self._package_registrar.get_package_deploy_status( package) if deploy_status: status = deploy_status["state"] information = deploy_status["information"] # check if package data exists in database: if package_exists: properties = self._package_parser.properties_from_metadata( metadata['metadata']) status = PackageDeploymentState.DEPLOYED name = metadata['name'] version = metadata['version'] else: if not deploy_status: status = PackageDeploymentState.NOTDEPLOYED properties = None name = package.rpartition('-')[0] version = package.rpartition('-')[2] ret = { "name": name, "version": version, "status": status, "user": package_owner, "defaults": properties, "information": information } return ret def _run_asynch_package_task(self, package_name, initial_state, working_state, task, auth_check): """ Manages locks and state reporting for async background operations on packages :param package_name: The name of the package to operate on :param initial_state: The state to check before beginning work on the package :param working_state: The state to set while the package operation is being carried out. :param task: The actual work to be carried out """ with self._lock: # check that package is in the right state before starting operation: self._assert_package_status(package_name, initial_state) auth_check() # set the operation state before starting: self._set_package_progress(package_name, working_state) # this will be run in the background while taking care to release all locks and intermediate states: def do_work_and_report_progress(): try: # report beginning of work to external APIs: self._state_change_event_package(package_name) # do the actual work: task() finally: # release the lock on the package: self._clear_package_progress(package_name) # report completion to external APIs self._state_change_event_package(package_name) # run everything on a background thread: self.dispatcher.run_as_asynch(task=do_work_and_report_progress) def deploy_package(self, package, user_name): def auth_check(): self._authorize(user_name, Resources.PACKAGE, None, Actions.DEPLOY) # this function will be executed in the background: def _do_deploy(): # if this value is not changed, then it is assumed that the operation never completed package_data_path = None try: package_file = package + '.tar.gz' logging.info("deploy: %s", package) # download package: package_data_path = self._repository.get_package( package_file, user_name) # put package in database: metadata = self._package_parser.get_package_metadata( package_data_path) self._application_creator.validate_package(package, metadata) self._package_registrar.set_package(package, package_data_path, user_name) # set the operation status as complete deploy_status = { "state": PackageDeploymentState.DEPLOYED, "information": "Deployed " + package + " at " + self.utc_string() } logging.info("deployed: %s", package) except Exception as ex: logging.error(str(ex)) error_message = "Error deploying " + package + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) deploy_status = { "state": PackageDeploymentState.NOTDEPLOYED, "information": error_message } raise finally: # report final state of operation to database: self._package_registrar.set_package_deploy_status( package, deploy_status) if package_data_path is not None: os.remove(package_data_path) # schedule work to be done in the background: self._run_asynch_package_task( package_name=package, initial_state=PackageDeploymentState.NOTDEPLOYED, working_state=PackageDeploymentState.DEPLOYING, task=_do_deploy, auth_check=auth_check) def utc_string(self): return datetime.datetime.utcnow().isoformat() def undeploy_package(self, package, user_name): def auth_check(): package_owner = self._get_package_owner(package) self._authorize(user_name, Resources.PACKAGE, package_owner, Actions.UNDEPLOY) # this function will be executed in the background: def do_undeploy(): deploy_status = None try: logging.info("undeploy: %s", package) self._package_registrar.delete_package(package) logging.info("undeployed: %s", package) except Exception as ex: # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error undeploying " + package + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: deploy_status = { "state": PackageDeploymentState.DEPLOYED, "information": error_message } raise finally: if deploy_status is not None: # persist any errors in the database, but still throw them: self._package_registrar.set_package_deploy_status( package, deploy_status) # schedule work to be done in the background: self._run_asynch_package_task( package_name=package, initial_state=PackageDeploymentState.DEPLOYED, working_state=PackageDeploymentState.UNDEPLOYING, task=do_undeploy, auth_check=auth_check) def _set_package_progress(self, package_name, state): """ Marks the progress of background operations being run on the app. :param package_name: the name of the package to be modified :param state: the state of the background operation """ # currently we are using multiple threads, so this lock is added for thread saftey with self._lock: self._package_progress[package_name] = state def _get_package_progress(self, package_name): """ :param package_name: The name of the package for which to query progress :return: the state of the package """ with self._lock: if self._is_package_in_progress(package_name): return self._package_progress[package_name] return None def _is_package_in_progress(self, package_name): """ checks if the current package has an operation in progress :param package_name: the name of the package to check :return: true if the package is currently being operated on """ with self._lock: return package_name in self._package_progress def _clear_package_progress(self, package): with self._lock: self._package_progress.pop(package, None) def _mark_destroying(self, package): self._set_package_progress(package, ApplicationState.DESTROYING) def _mark_creating(self, package): self._set_package_progress(package, ApplicationState.CREATING) def _mark_starting(self, package): self._set_package_progress(package, ApplicationState.STARTING) def _mark_stopping(self, package): self._set_package_progress(package, ApplicationState.STOPPING) def list_package_applications(self, package, user_name): self._authorize(user_name, Resources.APPLICATIONS, None, Actions.READ) logging.info('list_package_applications') applications = self._application_registrar.list_applications_for_package( package) return applications def list_applications(self, user_name): self._authorize(user_name, Resources.APPLICATIONS, None, Actions.READ) logging.info('list_applications') applications = self._application_registrar.list_applications() return applications def _assert_application_status(self, application, required_status): logging.debug("Checking %s is %s", application, json.dumps(required_status)) app_info = self.get_application_info(application) status = app_info['status'] logging.debug("Found %s is %s", application, status) if (isinstance(required_status, list) and status not in required_status) \ or (not isinstance(required_status, list) and status != required_status): if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) logging.debug("Status for %s is OK", application) def _assert_application_exists(self, application): status = self.get_application_info(application)['status'] if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) def start_application(self, application, user_name): logging.info('start_application') with self._lock: self._assert_application_status(application, ApplicationState.CREATED) application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.START) self._mark_starting(application) def do_work_start(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.start_application( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.STARTED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.CREATED, "starting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work_start) def stop_application(self, application, user_name): logging.info('stop_application') with self._lock: self._assert_application_status(application, ApplicationState.STARTED) application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.STOP) self._mark_stopping(application) def do_work_stop(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.stop_application( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "stopping") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work_stop) def get_application_info(self, application, user_name=None): if user_name is not None: application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.READ) logging.info('get_application_info') if not self._application_registrar.application_has_record(application): record = { 'status': ApplicationState.NOTCREATED, 'information': None } else: record = self._application_registrar.get_application(application) progress_state = self._get_package_progress(application) if progress_state is not None: record['status'] = progress_state return record def get_application_detail(self, application, user_name): application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.READ) logging.info('get_application_detail') self._assert_application_exists(application) create_data = self._application_registrar.get_create_data(application) record = self._application_creator.get_application_runtime_details( application, create_data) record['status'] = self.get_application_info(application)['status'] record['name'] = application return record def get_application_summary(self, application, user_name): application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.READ) logging.info('get_application_summary') record = self._application_summary_registrar.get_summary_data( application) return record # XXXX def get_pod_logs(self, pod_name, namespace_id): config.load_incluster_config() logging.info('Inside pod log121 *******' + pod_name) try: configuration = client.Configuration() api_client = client.ApiClient(configuration) api_instance = client.CoreV1Api(api_client) api_response = api_instance.read_namespaced_pod_log( name=str(pod_name) + "-driver", namespace=namespace_id) logging.info('Inside pod log *******' + pod_name) logging.info(api_response) return api_response except ApiException as e: print('Exception in getting status') def get_pod_state(self, pod_name, namespace_id): config.load_incluster_config() logging.info('Inside pod state before try *******' + pod_name) try: configuration = client.Configuration() api_client = client.ApiClient(configuration) api_instance = client.CoreV1Api(api_client) api_response_state = api_instance.read_namespaced_pod_status( name=str(pod_name) + "-driver", namespace=namespace_id) logging.info('Inside pod state *******' + pod_name) return api_response_state.status.phase except ApiException as e: print('Exception in getting status') def get_application_log(self, application, user_name): application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.READ) logging.info('get_application_log') record = self.get_pod_logs(application, 'pnda') return record def get_application_state(self, application, user_name): application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.READ) logging.info('get_application_state') record = self.get_pod_state(application, 'pnda') return record # XXXX def create_application(self, package, application, overrides, user_name): logging.info('create_application') package_data_path = None with self._lock: self._assert_application_status(application, ApplicationState.NOTCREATED) self._assert_package_status(package, PackageDeploymentState.DEPLOYED) package_owner = self._get_application_owner(package) self._authorize(user_name, Resources.PACKAGE, package_owner, Actions.READ) self._authorize(user_name, Resources.APPLICATION, None, Actions.CREATE) defaults = self.get_package_info(package)['defaults'] self._application_creator.assert_application_properties( overrides, defaults) package_data_path = self._package_registrar.get_package_data( package) self._application_registrar.create_application( package, application, overrides, defaults) self._mark_creating(application) def do_work_create(): try: self._state_change_event_application(application) try: package_metadata = self._package_registrar.get_package_metadata( package)['metadata'] create_data = self._application_creator.create_application( package_data_path, package_metadata, application, overrides) self._application_registrar.set_create_data( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.NOTCREATED, "creating") logging.error(traceback.format_exc(ex)) raise finally: # clear inner locks: self._clear_package_progress(application) self._state_change_event_application(application) if package_data_path is not None: os.remove(package_data_path) self.dispatcher.run_as_asynch(task=do_work_create) def _handle_application_error(self, application, ex, app_status, operation): """ Use to handle application exceptions which should be relayed back to the user Sets the application state to an error :param application: The app for which to set the error :param ex: The error :param app_status: The status the app should be at following the error. """ # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error %s " % operation + application + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: self._application_registrar.set_application_status( application, app_status, error_message) def delete_application(self, application, user_name): logging.info('delete_application') with self._lock: self._assert_application_status( application, [ApplicationState.CREATED, ApplicationState.STARTED]) application_owner = self._get_application_owner(application) self._authorize(user_name, Resources.APPLICATION, application_owner, Actions.DESTROY) self._mark_destroying(application) def do_work_delete(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.destroy_application( application, create_data) self._application_registrar.delete_application(application) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "deleting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work_delete) def _state_change_event_application(self, name): endpoint_type = "application_callback" info = self.get_application_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event_package(self, name): endpoint_type = "package_callback" info = self.get_package_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event(self, name, endpoint_type, state, information): callback_url = self._config[endpoint_type] if callback_url: logging.debug("callback: %s %s %s", endpoint_type, name, state) callback_payload = { "data": [{ "id": name, "state": state, "timestamp": milli_time() }], "timestamp": milli_time() } # add additional optional information if information: callback_payload["data"][0]["information"] = information logging.debug(callback_payload) self.rest_client.post(callback_url, json=callback_payload)
class DeploymentManager(object): def __init__(self, repository, package_registrar, application_registrar, environment, config): self._repository = repository self._package_registrar = package_registrar self._application_registrar = application_registrar self._environment = environment self._config = config self._application_creator = application_creator.ApplicationCreator(config, environment, environment['namespace']) self._package_parser = PackageParser() self._package_progress = {} self._lock = threading.RLock() # load number of threads from config file: number_of_threads = self._config["deployer_thread_limit"] assert isinstance(number_of_threads, (int)) assert number_of_threads > 0 self.dispatcher = AsyncDispatcher(num_threads=number_of_threads) self.rest_client = requests def get_environment(self): return self._environment def list_packages(self): logging.info('list_deployed') deployed = self._package_registrar.list_packages() return deployed def _assert_package_status(self, package, required_status): status = self.get_package_info(package)['status'] if status != required_status: if status == PackageDeploymentState.NOTDEPLOYED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) def list_repository(self, recency): logging.info("list_available: %s", recency) available = self._repository.get_package_list(recency) return available def get_package_info(self, package): information = None progress_state = self._get_package_progress(package) if progress_state is not None: properties = None status = progress_state name = package.rpartition('-')[0] version = package.rpartition('-')[2] else: # package deploy is not in progress: # get last package status from database deploy_status = self._package_registrar.get_package_deploy_status(package) if deploy_status: status = deploy_status["state"] information = deploy_status["information"] # check if package data exists in database: if self._package_registrar.package_exists(package): metadata = self._package_registrar.get_package_metadata(package) properties = self._package_parser.properties_from_metadata(metadata['metadata']) status = PackageDeploymentState.DEPLOYED name = metadata['name'] version = metadata['version'] else: if not deploy_status: status = PackageDeploymentState.NOTDEPLOYED properties = None name = package.rpartition('-')[0] version = package.rpartition('-')[2] ret = {"name": name, "version": version, "status": status, "defaults": properties, "information": information} return ret def _run_asynch_package_task(self, package_name, initial_state, working_state, task): """ Manages locks and state reporting for async background operations on packages :param package_name: The name of the package to operate on :param initial_state: The state to check before beginning work on the package :param working_state: The state to set while the package operation is being carried out. :param task: The actual work to be carried out """ with self._lock: # check that package is in the right state before starting operation: self._assert_package_status(package_name, initial_state) # set the operation state before starting: self._set_package_progress(package_name, working_state) # this will be run in the background while taking care to release all locks and intermediate states: def do_work_and_report_progress(): try: # report beginning of work to external APIs: self._state_change_event_package(package_name) # do the actual work: task() finally: # release the lock on the package: self._clear_package_progress(package_name) # report completion to external APIs self._state_change_event_package(package_name) # run everything on a background thread: self.dispatcher.run_as_asynch(task=do_work_and_report_progress) def deploy_package(self, package): # this function will be executed in the background: def _do_deploy(): # if this value is not changed, then it is assumed that the operation never completed try: package_file = package + '.tar.gz' logging.info("deploy: %s", package) # download package: package_data_path = self._repository.get_package(package_file) # put package in database: metadata = self._package_parser.get_package_metadata(package_data_path) self._application_creator.validate_package(package, metadata) self._package_registrar.set_package(package, package_data_path) # set the operation status as complete deploy_status = {"state": PackageDeploymentState.DEPLOYED, "information": "Deployed " + package + " at " + self.utc_string()} logging.info("deployed: %s", package) except Exception as ex: logging.error(str(ex)) error_message = "Error deploying " + package + " " + str(type(ex).__name__) + ", details: " + json.dumps(str(ex)) deploy_status = {"state": PackageDeploymentState.NOTDEPLOYED, "information": error_message} raise finally: # report final state of operation to database: self._package_registrar.set_package_deploy_status(package, deploy_status) os.remove(package_data_path) # schedule work to be done in the background: self._run_asynch_package_task(package_name=package, initial_state=PackageDeploymentState.NOTDEPLOYED, working_state=PackageDeploymentState.DEPLOYING, task=_do_deploy) def utc_string(self): return datetime.datetime.utcnow().isoformat() def undeploy_package(self, package): # this function will be executed in the background: def do_undeploy(): deploy_status = None try: logging.info("undeploy: %s", package) self._package_registrar.delete_package(package) logging.info("undeployed: %s", package) except Exception as ex: # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error undeploying " + package + " " + str(type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: deploy_status = {"state": PackageDeploymentState.DEPLOYED, "information": error_message} raise finally: if deploy_status is not None: # persist any errors in the database, but still throw them: self._package_registrar.set_package_deploy_status(package, deploy_status) # schedule work to be done in the background: self._run_asynch_package_task(package_name=package, initial_state=PackageDeploymentState.DEPLOYED, working_state=PackageDeploymentState.UNDEPLOYING, task=do_undeploy) def _set_package_progress(self, package_name, state): """ Marks the progress of background operations being run on the app. :param package_name: the name of the package to be modified :param state: the state of the background operation """ # currently we are using multiple threads, so this lock is added for thread saftey with self._lock: self._package_progress[package_name] = state def _get_package_progress(self, package_name): """ :param package_name: The name of the package for which to query progress :return: the state of the package """ with self._lock: if self._is_package_in_progress(package_name): return self._package_progress[package_name] return None def _is_package_in_progress(self, package_name): """ checks if the current package has an operation in progress :param package_name: the name of the package to check :return: true if the package is currently being operated on """ with self._lock: return package_name in self._package_progress def _clear_package_progress(self, package): with self._lock: self._package_progress.pop(package, None) def _mark_destroying(self, package): self._set_package_progress(package, ApplicationState.DESTROYING) def _mark_creating(self, package): self._set_package_progress(package, ApplicationState.CREATING) def _mark_starting(self, package): self._set_package_progress(package, ApplicationState.STARTING) def _mark_stopping(self, package): self._set_package_progress(package, ApplicationState.STOPPING) def list_package_applications(self, package): logging.info('list_package_applications') applications = self._application_registrar.list_applications_for_package(package) return applications def list_applications(self): logging.info('list_applications') applications = self._application_registrar.list_applications() return applications def _assert_application_status(self, application, required_status): app_info = self.get_application_info(application) status = app_info['status'] if (isinstance(required_status, list) and status not in required_status) \ or (not isinstance(required_status, list) and status != required_status): if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) def _assert_application_exists(self, application): status = self.get_application_info(application)['status'] if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) def start_application(self, application): logging.info('start_application') with self._lock: self._assert_application_status(application, ApplicationState.CREATED) self._mark_starting(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data(application) self._application_creator.start_application(application, create_data) self._application_registrar.set_application_status(application, ApplicationState.STARTED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.CREATED, "starting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def stop_application(self, application): logging.info('stop_application') with self._lock: self._assert_application_status(application, ApplicationState.STARTED) self._mark_stopping(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data(application) self._application_creator.stop_application(application, create_data) self._application_registrar.set_application_status(application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "stopping") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def get_application_info(self, application): logging.info('get_application_info') if not self._application_registrar.application_has_record(application): record = {'status': ApplicationState.NOTCREATED, 'information': None} else: record = self._application_registrar.get_application(application) progress_state = self._get_package_progress(application) if progress_state is not None: record['status'] = progress_state return record def get_application_detail(self, application): logging.info('get_application_detail') self._assert_application_exists(application) create_data = self._application_registrar.get_create_data(application) record = self._application_creator.get_application_runtime_details(application, create_data) record['status'] = self.get_application_info(application)['status'] record['name'] = application return record def create_application(self, package, application, overrides): logging.info('create_application') with self._lock: self._assert_application_status(application, ApplicationState.NOTCREATED) self._assert_package_status(package, PackageDeploymentState.DEPLOYED) defaults = self.get_package_info(package)['defaults'] package_data_path = self._package_registrar.get_package_data(package) self._application_registrar.create_application(package, application, overrides, defaults) self._mark_creating(application) def do_work(): try: self._state_change_event_application(application) try: package_metadata = self._package_registrar.get_package_metadata(package)['metadata'] create_data = self._application_creator.create_application( package_data_path, package_metadata, application, overrides) self._application_registrar.set_create_data(application, create_data) self._application_registrar.set_application_status(application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.NOTCREATED, "creating") logging.error(traceback.format_exc(ex)) raise finally: # clear inner locks: self._clear_package_progress(application) self._state_change_event_application(application) os.remove(package_data_path) self.dispatcher.run_as_asynch(task=do_work) def _handle_application_error(self, application, ex, app_status, operation): """ Use to handle application exceptions which should be relayed back to the user Sets the application state to an error :param application: The app for which to set the error :param ex: The error :param app_status: The status the app should be at following the error. """ # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error %s " % operation + application + " " + str(type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: self._application_registrar.set_application_status(application, app_status, error_message) def delete_application(self, application): logging.info('delete_application') with self._lock: self._assert_application_status(application, [ApplicationState.CREATED, ApplicationState.STARTED]) self._mark_destroying(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data(application) self._application_creator.destroy_application(application, create_data) self._application_registrar.delete_application(application) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "deleting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def _state_change_event_application(self, name): endpoint_type = "application_callback" info = self.get_application_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event_package(self, name): endpoint_type = "package_callback" info = self.get_package_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event(self, name, endpoint_type, state, information): callback_url = self._config[endpoint_type] if callback_url: logging.debug("callback: %s %s %s", endpoint_type, name, state) callback_payload = { "data": [ { "id": name, "state": state, "timestamp": milli_time() } ], "timestamp": milli_time() } # add additional optional information if information: callback_payload["data"][0]["information"] = information logging.debug(callback_payload) self.rest_client.post(callback_url, json=callback_payload)
class DeploymentManager(object): def __init__(self, repository, package_registrar, application_registrar, environment, config): self._repository = repository self._package_registrar = package_registrar self._application_registrar = application_registrar self._environment = environment self._config = config self._application_creator = application_creator.ApplicationCreator( config, environment, environment['namespace']) self._package_parser = PackageParser() self._package_progress = {} self._lock = threading.RLock() # load number of threads from config file: number_of_threads = self._config["deployer_thread_limit"] assert isinstance(number_of_threads, (int)) assert number_of_threads > 0 self.dispatcher = AsyncDispatcher(num_threads=number_of_threads) self.rest_client = requests def get_environment(self): return self._environment def list_packages(self): logging.info('list_deployed') deployed = self._package_registrar.list_packages() return deployed def _assert_package_status(self, package, required_status): status = self.get_package_info(package)['status'] if status != required_status: if status == PackageDeploymentState.NOTDEPLOYED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) def list_repository(self, recency): logging.info("list_available: %s", recency) available = self._repository.get_package_list(recency) return available def get_package_info(self, package): information = None progress_state = self._get_package_progress(package) if progress_state is not None: properties = None status = progress_state name = package.rpartition('-')[0] version = package.rpartition('-')[2] else: # package deploy is not in progress: # get last package status from database deploy_status = self._package_registrar.get_package_deploy_status( package) if deploy_status: status = deploy_status["state"] information = deploy_status["information"] # check if package data exists in database: if self._package_registrar.package_exists(package): metadata = self._package_registrar.get_package_metadata( package) properties = self._package_parser.properties_from_metadata( metadata['metadata']) status = PackageDeploymentState.DEPLOYED name = metadata['name'] version = metadata['version'] else: if not deploy_status: status = PackageDeploymentState.NOTDEPLOYED properties = None name = package.rpartition('-')[0] version = package.rpartition('-')[2] ret = { "name": name, "version": version, "status": status, "defaults": properties, "information": information } return ret def _run_asynch_package_task(self, package_name, initial_state, working_state, task): """ Manages locks and state reporting for async background operations on packages :param package_name: The name of the package to operate on :param initial_state: The state to check before beginning work on the package :param working_state: The state to set while the package operation is being carried out. :param task: The actual work to be carried out """ with self._lock: # check that package is in the right state before starting operation: self._assert_package_status(package_name, initial_state) # set the operation state before starting: self._set_package_progress(package_name, working_state) # this will be run in the background while taking care to release all locks and intermediate states: def do_work_and_report_progress(): try: # report beginning of work to external APIs: self._state_change_event_package(package_name) # do the actual work: task() finally: # release the lock on the package: self._clear_package_progress(package_name) # report completion to external APIs self._state_change_event_package(package_name) # run everything on a background thread: self.dispatcher.run_as_asynch(task=do_work_and_report_progress) def deploy_package(self, package): # this function will be executed in the background: def _do_deploy(): # if this value is not changed, then it is assumed that the operation never completed try: package_file = package + '.tar.gz' logging.info("deploy: %s", package) # download package: package_data_path = self._repository.get_package(package_file) # put package in database: metadata = self._package_parser.get_package_metadata( package_data_path) self._application_creator.validate_package(package, metadata) self._package_registrar.set_package(package, package_data_path) # set the operation status as complete deploy_status = { "state": PackageDeploymentState.DEPLOYED, "information": "Deployed " + package + " at " + self.utc_string() } logging.info("deployed: %s", package) except Exception as ex: logging.error(str(ex)) error_message = "Error deploying " + package + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) deploy_status = { "state": PackageDeploymentState.NOTDEPLOYED, "information": error_message } raise finally: # report final state of operation to database: self._package_registrar.set_package_deploy_status( package, deploy_status) os.remove(package_data_path) # schedule work to be done in the background: self._run_asynch_package_task( package_name=package, initial_state=PackageDeploymentState.NOTDEPLOYED, working_state=PackageDeploymentState.DEPLOYING, task=_do_deploy) def utc_string(self): return datetime.datetime.utcnow().isoformat() def undeploy_package(self, package): # this function will be executed in the background: def do_undeploy(): deploy_status = None try: logging.info("undeploy: %s", package) self._package_registrar.delete_package(package) logging.info("undeployed: %s", package) except Exception as ex: # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error undeploying " + package + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: deploy_status = { "state": PackageDeploymentState.DEPLOYED, "information": error_message } raise finally: if deploy_status is not None: # persist any errors in the database, but still throw them: self._package_registrar.set_package_deploy_status( package, deploy_status) # schedule work to be done in the background: self._run_asynch_package_task( package_name=package, initial_state=PackageDeploymentState.DEPLOYED, working_state=PackageDeploymentState.UNDEPLOYING, task=do_undeploy) def _set_package_progress(self, package_name, state): """ Marks the progress of background operations being run on the app. :param package_name: the name of the package to be modified :param state: the state of the background operation """ # currently we are using multiple threads, so this lock is added for thread saftey with self._lock: self._package_progress[package_name] = state def _get_package_progress(self, package_name): """ :param package_name: The name of the package for which to query progress :return: the state of the package """ with self._lock: if self._is_package_in_progress(package_name): return self._package_progress[package_name] return None def _is_package_in_progress(self, package_name): """ checks if the current package has an operation in progress :param package_name: the name of the package to check :return: true if the package is currently being operated on """ with self._lock: return package_name in self._package_progress def _clear_package_progress(self, package): with self._lock: self._package_progress.pop(package, None) def _mark_destroying(self, package): self._set_package_progress(package, ApplicationState.DESTROYING) def _mark_creating(self, package): self._set_package_progress(package, ApplicationState.CREATING) def _mark_starting(self, package): self._set_package_progress(package, ApplicationState.STARTING) def _mark_stopping(self, package): self._set_package_progress(package, ApplicationState.STOPPING) def list_package_applications(self, package): logging.info('list_package_applications') applications = self._application_registrar.list_applications_for_package( package) return applications def list_applications(self): logging.info('list_applications') applications = self._application_registrar.list_applications() return applications def _assert_application_status(self, application, required_status): app_info = self.get_application_info(application) status = app_info['status'] if (isinstance(required_status, list) and status not in required_status) \ or (not isinstance(required_status, list) and status != required_status): if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) else: raise ConflictingState(json.dumps({'status': status})) def _assert_application_exists(self, application): status = self.get_application_info(application)['status'] if status == ApplicationState.NOTCREATED: raise NotFound(json.dumps({'status': status})) def start_application(self, application): logging.info('start_application') with self._lock: self._assert_application_status(application, ApplicationState.CREATED) self._mark_starting(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.start_application( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.STARTED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.CREATED, "starting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def stop_application(self, application): logging.info('stop_application') with self._lock: self._assert_application_status(application, ApplicationState.STARTED) self._mark_stopping(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.stop_application( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "stopping") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def get_application_info(self, application): logging.info('get_application_info') if not self._application_registrar.application_has_record(application): record = { 'status': ApplicationState.NOTCREATED, 'information': None } else: record = self._application_registrar.get_application(application) progress_state = self._get_package_progress(application) if progress_state is not None: record['status'] = progress_state return record def get_application_detail(self, application): logging.info('get_application_detail') self._assert_application_exists(application) create_data = self._application_registrar.get_create_data(application) record = self._application_creator.get_application_runtime_details( application, create_data) record['status'] = self.get_application_info(application)['status'] record['name'] = application return record def create_application(self, package, application, overrides): logging.info('create_application') with self._lock: self._assert_application_status(application, ApplicationState.NOTCREATED) self._assert_package_status(package, PackageDeploymentState.DEPLOYED) defaults = self.get_package_info(package)['defaults'] package_data_path = self._package_registrar.get_package_data( package) self._application_registrar.create_application( package, application, overrides, defaults) self._mark_creating(application) def do_work(): try: self._state_change_event_application(application) try: package_metadata = self._package_registrar.get_package_metadata( package)['metadata'] create_data = self._application_creator.create_application( package_data_path, package_metadata, application, overrides) self._application_registrar.set_create_data( application, create_data) self._application_registrar.set_application_status( application, ApplicationState.CREATED) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.NOTCREATED, "creating") logging.error(traceback.format_exc(ex)) raise finally: # clear inner locks: self._clear_package_progress(application) self._state_change_event_application(application) os.remove(package_data_path) self.dispatcher.run_as_asynch(task=do_work) def _handle_application_error(self, application, ex, app_status, operation): """ Use to handle application exceptions which should be relayed back to the user Sets the application state to an error :param application: The app for which to set the error :param ex: The error :param app_status: The status the app should be at following the error. """ # log error to screen: logging.error(str(ex)) # prepare human readable message error_message = "Error %s " % operation + application + " " + str( type(ex).__name__) + ", details: " + json.dumps(str(ex)) # set the status: self._application_registrar.set_application_status( application, app_status, error_message) def delete_application(self, application): logging.info('delete_application') with self._lock: self._assert_application_status( application, [ApplicationState.CREATED, ApplicationState.STARTED]) self._mark_destroying(application) def do_work(): try: self._state_change_event_application(application) try: create_data = self._application_registrar.get_create_data( application) self._application_creator.destroy_application( application, create_data) self._application_registrar.delete_application(application) except Exception as ex: self._handle_application_error(application, ex, ApplicationState.STARTED, "deleting") raise finally: self._clear_package_progress(application) self._state_change_event_application(application) self.dispatcher.run_as_asynch(task=do_work) def _state_change_event_application(self, name): endpoint_type = "application_callback" info = self.get_application_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event_package(self, name): endpoint_type = "package_callback" info = self.get_package_info(name) self._state_change_event(name, endpoint_type, info['status'], info['information']) def _state_change_event(self, name, endpoint_type, state, information): callback_url = self._config[endpoint_type] if callback_url: logging.debug("callback: %s %s %s", endpoint_type, name, state) callback_payload = { "data": [{ "id": name, "state": state, "timestamp": milli_time() }], "timestamp": milli_time() } # add additional optional information if information: callback_payload["data"][0]["information"] = information logging.debug(callback_payload) self.rest_client.post(callback_url, json=callback_payload)
class HbasePackageRegistrar(object): COLUMN_DEPLOY_STATUS = "cf:deploy_status" def __init__(self, hbase_host, hdfs_host, hdfs_user, hdfs_port, package_local_dir_path): self._hbase_host = hbase_host self._hdfs_user = hdfs_user self._hdfs_host = hdfs_host self._hdfs_port = hdfs_port self._hdfs_client = HDFS(hdfs_host, hdfs_port, hdfs_user) self._parser = PackageParser() self._table_name = 'platform_packages' self._dm_root_dir_path = "/pnda/system/deployment-manager" self._package_hdfs_dir_path = "%s/packages" % self._dm_root_dir_path self._package_local_dir_path = package_local_dir_path try: if hdfs_host is not None: self._hdfs_client.make_dir(self._dm_root_dir_path, permission=755) self._hdfs_client.make_dir(self._package_hdfs_dir_path, permission=600) logging.debug("packages HDFS folder created") else: logging.debug( "not creating packages HDFS folder as it is not required") except AlreadyExists: logging.debug( "not creating packages HDFS folder as it already exists") if self._hbase_host is not None: connection = happybase.Connection(self._hbase_host) try: connection.create_table(self._table_name, {'cf': dict()}) logging.debug("packages table created") except AlreadyExists: logging.debug("packages table exists") finally: connection.close() def set_package(self, package_name, package_data_path, user): logging.debug("Storing %s", package_name) metadata = self._parser.get_package_metadata(package_data_path) metadata['user'] = user key, data = self.generate_record(metadata) self._write_to_hdfs(package_data_path, data['cf:package_data']) self._write_to_db(key, data) def set_package_deploy_status(self, package_name, deploy_status): """ Stores information about the progress of the deploy process of the package :param deploy_status: the state to store """ logging.debug("Storing state for %s: %s", package_name, str(deploy_status)) state_as_string = json.dumps(deploy_status) self._write_to_db(package_name, {self.COLUMN_DEPLOY_STATUS: state_as_string}) def delete_package(self, package_name): logging.debug("Deleting %s", package_name) package_data_hdfs_path = self._read_from_db( package_name, ['cf:package_data'])['cf:package_data'] self._hdfs_client.remove(package_data_hdfs_path) connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) table.delete(package_name) finally: connection.close() def get_package_data(self, package_name): logging.debug("Reading %s", package_name) record = self._read_from_db(package_name, ['cf:package_data']) if not record: return None local_package_path = "%s/%s" % (self._package_local_dir_path, package_name) self._read_from_hdfs(record['cf:package_data'], local_package_path) return local_package_path def get_package_metadata(self, package_name): logging.debug("Reading %s", package_name) data = self._read_from_db(package_name, ['cf:metadata', 'cf:name', 'cf:version']) if not data: return None package_data = { key.docode("utf-8"): value.decode("utf-8") for key, value in data.items() } return { "metadata": json.loads(package_data["cf:metadata"]), "name": package_data["cf:name"], "version": package_data["cf:version"] } def package_exists(self, package_name): logging.debug("Checking %s", package_name) package_data = self._read_from_db(package_name, ['cf:name']) return len(package_data) > 0 def get_package_deploy_status(self, package_name): """ :param package_name: the package name to check status for :return: The last reported progress of the deploy process for the current package """ logging.debug("Checking %s", package_name) package_data = self._read_from_db(package_name, columns=[self.COLUMN_DEPLOY_STATUS]) if not package_data: return None # all status is stored as json, so parse it and return it deploy_status_as_string = package_data[self.COLUMN_DEPLOY_STATUS] return json.loads(deploy_status_as_string) def list_packages(self): logging.debug("List all packages") connection = None try: connection = happybase.Connection(self._hbase_host) table = connection.table(self._table_name) result = [key for key, _ in table.scan(columns=['cf:name'])] except Exception as exc: logging.debug(str(exc)) raise FailedConnection('Unable to connect to the HBase master') finally: if connection: connection.close() return result def generate_record(self, metadata): return metadata["package_name"], { 'cf:name': '-'.join(metadata["package_name"].split("-")[:-1]), 'cf:version': metadata["package_name"].split("-")[-1], 'cf:metadata': json.dumps(metadata), 'cf:package_data': "%s/%s" % (self._package_hdfs_dir_path, metadata["package_name"]) } def _read_from_db(self, key, columns): connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) data = table.row(key, columns=columns) finally: connection.close() return data def _read_from_hdfs(self, source_hdfs_path, dest_local_path): self._hdfs_client.stream_file_to_disk(source_hdfs_path, dest_local_path) def _write_to_db(self, key, data): connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) table.put(key, data) finally: connection.close() def _write_to_hdfs(self, source_local_path, dest_hdfs_path): with open(source_local_path, 'rb') as source_file: first = True chunk_size = 10 * 1024 * 1024 data_chunk = source_file.read(chunk_size) while data_chunk: if first: self._hdfs_client.create_file(data_chunk, dest_hdfs_path, permission=600) first = False else: self._hdfs_client.append_file(data_chunk, dest_hdfs_path) data_chunk = source_file.read(chunk_size)
class HbasePackageRegistrar(object): COLUMN_DEPLOY_STATUS = "cf:deploy_status" def __init__(self, hbase_host, hdfs_host, hdfs_user, hdfs_port, package_local_dir_path): self._hbase_host = hbase_host self._hdfs_user = hdfs_user self._hdfs_host = hdfs_host self._hdfs_port = hdfs_port self._hdfs_client = HDFS(hdfs_host, hdfs_port, hdfs_user) self._parser = PackageParser() self._table_name = 'platform_packages' self._package_hdfs_dir_path = "/user/pnda/application_packages" self._package_local_dir_path = package_local_dir_path if self._hbase_host is not None: connection = happybase.Connection(self._hbase_host) try: connection.create_table(self._table_name, {'cf': dict()}) logging.debug("packages table created") except AlreadyExists: logging.debug("packages table exists") finally: connection.close() def set_package(self, package_name, package_data_path): logging.debug("Storing %s", package_name) metadata = self._parser.get_package_metadata(package_data_path) key, data = self.generate_record(metadata) self._write_to_hdfs(package_data_path, data['cf:package_data']) self._write_to_db(key, data) def set_package_deploy_status(self, package_name, deploy_status): """ Stores information about the progress of the deploy process of the package :param deploy_status: the state to store """ logging.debug("Storing state for %s: %s", package_name, str(deploy_status)) state_as_string = json.dumps(deploy_status) self._write_to_db(package_name, {self.COLUMN_DEPLOY_STATUS: state_as_string}) def delete_package(self, package_name): logging.debug("Deleting %s", package_name) package_data_hdfs_path = self._read_from_db(package_name, ['cf:package_data'])['cf:package_data'] self._hdfs_client.remove(package_data_hdfs_path) connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) table.delete(package_name) finally: connection.close() def get_package_data(self, package_name): logging.debug("Reading %s", package_name) record = self._read_from_db(package_name, ['cf:package_data']) if len(record) == 0: return None local_package_path = "%s/%s" % (self._package_local_dir_path, package_name) self._read_from_hdfs(record['cf:package_data'], local_package_path) return local_package_path def get_package_metadata(self, package_name): logging.debug("Reading %s", package_name) package_data = self._read_from_db( package_name, ['cf:metadata', 'cf:name', 'cf:version']) if len(package_data) == 0: return None return {"metadata": json.loads(package_data["cf:metadata"]), "name": package_data[ "cf:name"], "version": package_data["cf:version"]} def package_exists(self, package_name): logging.debug("Checking %s", package_name) package_data = self._read_from_db(package_name, ['cf:name']) return len(package_data) > 0 def get_package_deploy_status(self, package_name): """ :param package_name: the package name to check status for :return: The last reported progress of the deploy process for the current package """ logging.debug("Checking %s", package_name) package_data = self._read_from_db(package_name, columns=[self.COLUMN_DEPLOY_STATUS]) if len(package_data) == 0: return None # all status is stored as json, so parse it and return it deploy_status_as_string = package_data[self.COLUMN_DEPLOY_STATUS] return json.loads(deploy_status_as_string) def list_packages(self): logging.debug("List all packages") connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) result = [key for key, _ in table.scan(columns=['cf:name'])] finally: connection.close() return result def generate_record(self, metadata): return metadata["package_name"], { 'cf:name': '-'.join(metadata["package_name"].split("-")[:-1]), 'cf:version': metadata["package_name"].split("-")[-1], 'cf:metadata': json.dumps(metadata), 'cf:package_data': "%s/%s" % (self._package_hdfs_dir_path, metadata["package_name"]) } def _read_from_db(self, key, columns): connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) data = table.row(key, columns=columns) finally: connection.close() return data def _read_from_hdfs(self, source_hdfs_path, dest_local_path): self._hdfs_client.stream_file_to_disk(source_hdfs_path, dest_local_path) def _write_to_db(self, key, data): connection = happybase.Connection(self._hbase_host) try: table = connection.table(self._table_name) table.put(key, data) finally: connection.close() def _write_to_hdfs(self, source_local_path, dest_hdfs_path): with open(source_local_path, 'rb') as source_file: first = True chunk_size = 10*1024*1024 data_chunk = source_file.read(chunk_size) while data_chunk: if first: self._hdfs_client.create_file(data_chunk, dest_hdfs_path) first = False else: self._hdfs_client.append_file(data_chunk, dest_hdfs_path) data_chunk = source_file.read(chunk_size)