def groomHiveTables(model): if HIVE_TABLES in model[SRC] and len(model[SRC][HIVE_TABLES]) > 0: if not HIVE_RELAY in model[SRC]: misc.ERROR( 'A hive_relay must be defined if at least one hive table is defined' ) for tbl in model[SRC][HIVE_TABLES]: misc.setDefaultInMap(tbl, NO_REMOVE, False) # We detect some miss configuration at this level (Instead of letting jdchive failing), as user report will be far better if (INPUT_FORMAT in tbl) != (OUTPUT_FORMAT in tbl): misc.ERROR( "HIVE table '{0}:{1}': Both 'input_format' and 'output_format' must be defined together!" .format(tbl[DATABASE], tbl[NAME])) if (STORED_AS in tbl) and (INPUT_FORMAT in tbl): misc.ERROR( "HIVE table '{0}:{1}': Both 'stored_as' and 'input/output_format' can't be defined together!" .format(tbl[DATABASE], tbl[NAME])) if (DELIMITED in tbl) and (SERDE in tbl): misc.ERROR( "HIVE table '{0}:{1}': Both 'delimited' and 'serde' can't be defined together!" .format(tbl[DATABASE], tbl[NAME])) if (LOCATION in tbl) and (not tbl[LOCATION].startswith("/")): misc.ERROR( "HIVE table '{0}:{1}': Location must be absolute!".format( tbl[DATABASE], tbl[NAME]))
def groomYarnServices(model): if YARN_SERVICES in model[SRC] and len(model[SRC][YARN_SERVICES]) > 0: if not YARN_RELAY in model[SRC]: misc.ERROR( "A yarn_relay must be defined if at least one yarn_services is defined" ) for service in model[SRC][YARN_SERVICES]: misc.setDefaultInMap(service, LAUNCHING_DIR, "~") misc.setDefaultInMap(service, TIMEOUT_SECS, model[SRC][YARN_RELAY][DEFAULT_TIMEOUT_SECS]) if LAUNCHING_DIR in service: if not os.path.isabs( service[LAUNCHING_DIR] ) and not service[LAUNCHING_DIR].startswith("~"): misc.ERROR( "yarn_services '{}': launching_dir must be an absolute path" .format(service[NAME])) if ALL_SERVICES in model[DATA][YARN]: model[DATA][YARN][ALL_SERVICES] = model[DATA][YARN][ ALL_SERVICES] + "," + service[NAME] else: model[DATA][YARN][ALL_SERVICES] = service[NAME] if not KILLING_CMD in service: if SERVICES_TO_KILL in model[DATA][YARN]: model[DATA][YARN][SERVICES_TO_KILL] = model[DATA][YARN][ SERVICES_TO_KILL] + "," + service[NAME] else: model[DATA][YARN][SERVICES_TO_KILL] = service[NAME]
def groomSupervisors(self): if self.context.toExclude(SCOPE_SUPERVISOR): return model = self.context.model if SUPERVISORS in model[SRC]: misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS], [SUPERVISOR_BY_NAME], {}) for supervisord in model[SRC][SUPERVISORS]: if supervisord[NAME] in self.context.model[DATA][SUPERVISORS][SUPERVISOR_BY_NAME]: misc.ERROR("supervisor '{0}' is defined twice!".format(supervisord[NAME])) self.context.model[DATA][SUPERVISORS][SUPERVISOR_BY_NAME][supervisord[NAME]] = supervisord misc.setDefaultInMap(supervisord, MANAGED, True) self.groomOneSupervisord(model, supervisord) # ---------------------- Insert in scope if not self.context.checkScope(supervisord[SCOPE]): misc.ERROR("Supervisor {0}: scope attribute '{1}' does not match any host or host_group!".format(supervisord[NAME], supervisord[SCOPE])) else: #misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [supervisord[SCOPE], SUPERVISORS], []) #misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [supervisord[SCOPE], PROGRAMS], []) #model[DATA][SUPERVISORS][SCOPE_BY_NAME][supervisord[SCOPE]][SUPERVISORS].append(supervisord) if supervisord[MANAGED]: misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [supervisord[SCOPE], SUPERVISORS_TO_MANAGE], []) model[DATA][SUPERVISORS][SCOPE_BY_NAME][supervisord[SCOPE]][SUPERVISORS_TO_MANAGE].append(supervisord) if not supervisord[NO_REMOVE]: misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [supervisord[SCOPE], SUPERVISORS_TO_REMOVE], []) model[DATA][SUPERVISORS][SCOPE_BY_NAME][supervisord[SCOPE]][SUPERVISORS_TO_REMOVE].append(supervisord)
def groomNodeToHdfsFilesOrTrees(f, model): misc.setDefaultInMap(f, DEST_NAME, os.path.basename(f[FSRC])) f[_TARGET_] = os.path.normpath(os.path.join(f[DEST_FOLDER], f[DEST_NAME])) src = f[FSRC][len("node://"):] p = src.find("/") if p != -1: node = src[:p] if not node in model[DATA][INVENTORY][HOST_BY_NAME]: misc.ERROR( "Files: {0} is not a valid form for 'src' attribute: Node '{1}' does not exists" .format(f[FSRC], node)) else: if f[SCOPE] != HDFS: misc.ERROR( "Files: {0} is not a valid form for 'src' attribute: Copying from node is only valid for 'hdfs' scope" .format(f[FSRC])) else: path = src[p:] if not path.startswith("/"): misc.ERROR( "Files: {0} is not a valid form for 'src' attribute: Copying from node require an absolute path" .format(f[FSRC])) f[_SRC_] = path f[SCOPE] = node f[NODE_TO_HDFS_FLAG] = True # Flag it as a special meaning else: misc.ERROR("Files: {0} is not a valid form as 'src' attribute".format( f[FSRC]))
def check(model): hostsToSetup = Set() if HOSTS in model[SRC]: for h in model[SRC][HOSTS]: misc.setDefaultInMap(h, FORCE_SETUP, False) if SSH_USER not in h: misc.ERROR("Hosts:'{0}': 'ssh_user must be defined!".format(h[NAME])) if (SSH_PRIVATE_FILE_FILE in h) and (SSH_PASSWORD in h): misc.ERROR("Hosts:'{0}': 'ssh_private_key_file' and 'ssh_password' can't be both defined!".format(h[NAME])) #if (not SSH_PRIVATE_FILE_FILE in h) and (not 'ssh_password' in h): # misc.ERROR("Hosts:'{0}': One of 'ssh_private_key_file' or 'ssh_password' must be defined!".format(h[NAME])) if SSH_PRIVATE_FILE_FILE in h: if not os.path.exists(h[SSH_PRIVATE_FILE_FILE]): misc.ERROR("host:{0}': ssh_private_key_file '{1}' does not exists!".format(h[NAME], h[SSH_PRIVATE_FILE_FILE])) if h[FORCE_SETUP]: hostsToSetup.add(h[NAME]) if HOST_GROUPS in model[SRC]: for hg in model[SRC][HOST_GROUPS]: misc.setDefaultInMap(hg, FORCE_SETUP, False) for hname in hg[HOSTS]: if not hname in model[DATA][INVENTORY][HOST_BY_NAME]: misc.ERROR("Group '{0}': Host '{1}' is not defined!".format(hg[NAME], hname)) if hg[FORCE_SETUP]: hostsToSetup.add(hname) model[DATA][INVENTORY][HOSTS_TO_SETUP] = list(hostsToSetup)
def groomMavenFiles(f, model): path = f[FSRC][len('mvn://'):] src = path.split("/") if len(src) < 4 or len(src) > 6: misc.ERROR( "'{0}' is not a valid maven path. Must be in the form mvn://maven_repo/group_id/artifact_id/version[classifier[/extension]]" .format(f[SRC])) if src[0] not in model[DATA][MAVEN_REPO_BY_NAME]: misc.ERROR("'{0}' is not a defined maven repository".format(src[0])) else: repository = model[DATA][MAVEN_REPO_BY_NAME][src[0]] f[_REPO_] = src[0] f[_GROUP_ID_] = src[1] f[_ARTIFACT_ID_] = src[2] f[_VERSION_] = src[3] if len(src) >= 5: if len(src[4]) > 0: f[_CLASSIFIER_] = src[4] if len(src) >= 6: f[_EXTENSION_] = src[5] else: f[_EXTENSION_] = "jar" else: f[_EXTENSION_] = "jar" # Fixup _repoUrl_ based on version if f[_VERSION_] == "latest": if LASTEST_URL in repository: f[_REPO_URL_] = repository[LASTEST_URL] elif URL in repository: f[_REPO_URL_] = repository[URL] else: misc.ERROR( "Maven artifact '{0}': No 'latest_url' nor 'url' defined in repository '{1}'" .format(src, repository[NAME])) elif f[_VERSION_].find("SNAPSHOT") != -1: if SNAPSHOTS_URL in repository: f[_REPO_URL_] = repository[SNAPSHOTS_URL] elif URL in repository: f[_REPO_URL_] = repository[URL] else: misc.ERROR( "Maven artifact '{0}': No 'snapshots_url' nor 'url' defined in repository '{1}'" .format(src, repository[NAME])) else: if RELEASES_URL in repository: f[_REPO_URL_] = repository[RELEASES_URL] elif URL in repository: f[_REPO_URL_] = repository[URL] else: misc.ERROR( "Maven artifact '{0}': No 'releases_url' nor 'url' defined in repository '{1}'" .format(src, repository[NAME])) misc.setDefaultInMap( f, DEST_NAME, "{0}-{1}{2}.{3}".format(f[_ARTIFACT_ID_], f[_VERSION_], ("-" + f[_CLASSIFIER_]) if _CLASSIFIER_ in f else "", f[_EXTENSION_])) f[_TARGET_] = os.path.normpath(os.path.join(f[DEST_FOLDER], f[DEST_NAME]))
def groomMavenRepositories(context): model = context.model misc.ensureObjectInMaps(model[DATA], [MAVEN_REPO_BY_NAME], {}) if MAVEN_REPOSITORIES in model[SRC]: for repo in model[SRC][MAVEN_REPOSITORIES]: model[DATA][MAVEN_REPO_BY_NAME][repo["name"]] = repo misc.setDefaultInMap(repo, VALIDATE_CERTS, True) misc.setDefaultInMap(repo, TIMEOUT, 10)
def grabHBaseRangerPoliciesFromTables(model): if HBASE_TABLES in model[SRC]: for table in model[SRC][HBASE_TABLES]: if RANGER_POLICY in table: policy = table[RANGER_POLICY] policy[TABLES] = [ table[NAMESPACE] +':' + table[NAME] ] misc.setDefaultInMap(policy, NAME, DEFAULT_HBASE_TABLE_POLICY_NAME.format(table[NAMESPACE], table[NAME])) policy[NO_REMOVE] = table[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [HBASE_RANGER_POLICIES], []) model[SRC][HBASE_RANGER_POLICIES].append(policy)
def grabKafkaRangerPoliciesFromTopics(model): if KAFKA_TOPICS in model[SRC]: for topic in model[SRC][KAFKA_TOPICS]: if RANGER_POLICY in topic: policy = topic[RANGER_POLICY] policy[TOPICS] = [ topic[NAME] ] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(topic[NAME])) policy[NO_REMOVE] = topic[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [KAFKA_RANGER_POLICIES], []) model[SRC][KAFKA_RANGER_POLICIES].append(policy)
def grabStormRangerPoliciesFromTopologies(model): if STORM_TOPOLOGIES in model[SRC]: for topology in model[SRC][STORM_TOPOLOGIES]: if RANGER_POLICY in topology: policy = topology[RANGER_POLICY] policy[TOPOLOGIES] = [ topology[NAME] ] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(topology[NAME])) policy[NO_REMOVE] = topology[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [STORM_RANGER_POLICIES], []) model[SRC][STORM_RANGER_POLICIES].append(policy)
def grabHBaseRangerPoliciesFromNamespaces(model): if HBASE_NAMESPACES in model[SRC]: for namespace in model[SRC][HBASE_NAMESPACES]: if RANGER_POLICY in namespace: policy = namespace[RANGER_POLICY] policy[TABLES] = [ namespace[NAME] + ":*" ] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(namespace[NAME])) policy[NO_REMOVE] = namespace[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [HBASE_RANGER_POLICIES], []) model[SRC][HBASE_RANGER_POLICIES].append(policy)
def grabHdfsRangerPoliciesFromTrees(model): if TREES in model[SRC]: for tree in model[SRC][TREES]: if RANGER_POLICY in tree: if tree[SCOPE] != HDFS: misc.ERROR("Can't setup Apache Ranger policy on tree '{0}' as scope is not hdfs".format(tree[DEST_FOLDER])) policy = tree[RANGER_POLICY] policy[PATHS] = [ tree[DEST_FOLDER] ] policy[NO_REMOVE] = tree[NO_REMOVE] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(tree[DEST_FOLDER])) misc.ensureObjectInMaps( model[SRC], [HDFS_RANGER_POLICIES], []) model[SRC][HDFS_RANGER_POLICIES].append(policy)
def grabHdfsRangerPoliciesFromFolders(model): if FOLDERS in model[SRC]: for folder in model[SRC][FOLDERS]: if RANGER_POLICY in folder: if folder[SCOPE] != HDFS: misc.ERROR("Can't setup Apache Ranger policy on folder '{0}' as scope is not hdfs".format(folder[PATH])) policy = folder[RANGER_POLICY] policy[PATHS] = [ folder[PATH] ] policy[NO_REMOVE] = folder[NO_REMOVE] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(folder[PATH])) misc.ensureObjectInMaps( model[SRC], [HDFS_RANGER_POLICIES], []) model[SRC][HDFS_RANGER_POLICIES].append(policy)
def grabHiveRangerPoliciesFromDatabase(model): if HIVE_DATABASES in model[SRC]: for database in model[SRC][HIVE_DATABASES]: if RANGER_POLICY in database: policy = database[RANGER_POLICY] policy[DATABASES] = [ database[NAME] ] policy[TABLES] = [ "*" ] policy[COLUMNS] = [ "*" ] misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format(database[NAME])) policy[NO_REMOVE] = database[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [HIVE_RANGER_POLICIES], []) model[SRC][HIVE_RANGER_POLICIES].append(policy)
def grabHiveRangerPoliciesFromTables(model): if HIVE_TABLES in model[SRC]: for table in model[SRC][HIVE_TABLES]: if RANGER_POLICY in table: policy = table[RANGER_POLICY] policy[DATABASES] = [ table[DATABASE] ] policy[TABLES] = [ table[NAME] ] policy[COLUMNS] = [ "*" ] misc.setDefaultInMap(policy, NAME, DEFAULT_HIVE_TABLE_POLICY_NAME.format(table[DATABASE], table[NAME])) policy[NO_REMOVE] = table[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [HIVE_RANGER_POLICIES], []) model[SRC][HIVE_RANGER_POLICIES].append(policy)
def groomHbaseRelay(model): if HBASE_RELAY in model[SRC]: if (not HBASE_NAMESPACES in model[SRC] or len(model[SRC][HBASE_NAMESPACES]) == 0) and (not HBASE_TABLES in model[SRC] or len(model[SRC][HBASE_TABLES]) == 0): # Optimization on execution time del (model[SRC][HBASE_RELAY]) else: if not TOOLS_FOLDER in model[SRC][HBASE_RELAY]: model[SRC][HBASE_RELAY][TOOLS_FOLDER] = DEFAULT_TOOLS_FOLDER misc.setDefaultInMap(model[SRC][HBASE_RELAY], DEBUG, False) if PRINCIPAL in model[SRC][HBASE_RELAY]: if LOCAL_KEYTAB_PATH not in model[SRC][ HBASE_RELAY] and RELAY_KEYTAB_PATH not in model[SRC][ HBASE_RELAY]: misc.ERROR( "hbase_relay: Please provide a 'local_keytab_path' and/or a 'relay_keytab_path' if you want to use a Kerberos 'principal'" ) model[SRC][HBASE_RELAY][KERBEROS] = True if LOCAL_KEYTAB_PATH in model[SRC][HBASE_RELAY]: if not os.path.exists( model[SRC][HBASE_RELAY][LOCAL_KEYTAB_PATH]): misc.ERROR( "hbase_relay: local_keytab_file '{0}' does not exists!" .format( model[SRC][HBASE_RELAY][LOCAL_KEYTAB_PATH])) if RELAY_KEYTAB_PATH not in model[SRC][HBASE_RELAY]: model[SRC][HBASE_RELAY][ _RELAY_KEYTAB_FOLDER_] = os.path.join( model[SRC][HBASE_RELAY][TOOLS_FOLDER], "keytabs") model[SRC][HBASE_RELAY][RELAY_KEYTAB_PATH] = os.path.join( model[SRC][HBASE_RELAY][_RELAY_KEYTAB_FOLDER_], os.path.basename( model[SRC][HBASE_RELAY][LOCAL_KEYTAB_PATH])) if BECOME_USER in model[SRC][HBASE_RELAY]: misc.ERROR( "hbase_relay: become_user and principal can't be defined both!" ) model[SRC][HBASE_RELAY][LOGS_USER] = "{{ansible_user}}" else: if LOCAL_KEYTAB_PATH in model[SRC][ HBASE_RELAY] or RELAY_KEYTAB_PATH in model[SRC][ HBASE_RELAY]: misc.ERROR( "hbase_relay: Please, provide a 'principal' if you need to use a keytab" ) model[SRC][HBASE_RELAY][KERBEROS] = False if BECOME_USER in model[SRC][HBASE_RELAY]: model[SRC][HBASE_RELAY][LOGS_USER] = model[SRC][ HBASE_RELAY][BECOME_USER] else: model[SRC][HBASE_RELAY][LOGS_USER] = "{{ansible_user}}"
def grabHdfsRangerPoliciesFromFiles(model): if FILES in model[SRC]: for xfile in model[SRC][FILES]: if RANGER_POLICY in xfile: if xfile[SCOPE] != HDFS: misc.ERROR("Can't setup Apache Ranger policy on file '{0}' as scope is not hdfs".format(os.path.join(xfile[DEST_FOLDER], xfile[DEST_NAME]))) policy = xfile[RANGER_POLICY] misc.setDefaultInMap(policy, RECURSIVE, False) policy[PATHS] = [ os.path.join(xfile[DEST_FOLDER], xfile[DEST_NAME]) ] # groomFiles should have been called before misc.setDefaultInMap(policy, NAME, DEFAULT_POLICY_NAME.format( policy[PATHS][0])) policy[NO_REMOVE] = xfile[NO_REMOVE] misc.ensureObjectInMaps( model[SRC], [HDFS_RANGER_POLICIES], []) model[SRC][HDFS_RANGER_POLICIES].append(policy)
def onGrooming(self): self.context.model[DATA][INVENTORY] = {} misc.applyWhenOnList(self.context.model[SRC], HOSTS) misc.applyWhenOnList(self.context.model[SRC], HOST_GROUPS) misc.applyWhenOnList(self.context.model[SRC], HOST_OVERRIDES) misc.applyWhenOnList(self.context.model[SRC], HOST_GROUP_OVERRIDES) buildHostDicts(self.context.model) flattenGroups(self.context.model) handleHostOverrides(self.context.model) handleHostGroupOverrides(self.context.model) check(self.context.model) prepareAnsibleModel(self.context.model) misc.setDefaultInMap(self.context.model[SRC], EXIT_ON_FAIL, True)
def onGrooming(self): model = self.context.model misc.ensureObjectInMaps(model[DATA], [FILES, SCOPE_BY_NAME], {}) if HDFS in self.context.pluginByName: # We need to anticipate on works performed by hdfs plugin, as we need it right now misc.ensureObjectInMaps(model[DATA], [HDFS, NODE_TO_HDFS_BY_NAME], {}) misc.ensureObjectInMaps(model[DATA], [HDFS, FILES], []) misc.ensureObjectInMaps(model[DATA], [HDFS, FOLDERS], []) misc.ensureObjectInMaps(model[DATA], [HDFS, TREES], []) misc.ensureObjectInMaps(model[DATA], [HDFS, CACHEFOLDERS], Set()) misc.applyWhenOnSingle(self.context.model[SRC], HDFS_RELAY) misc.applyWhenOnList(self.context.model[SRC], SOURCE_HOST_CREDENTIALS) if HDFS_RELAY in model[SRC]: misc.setDefaultInMap(model[SRC][HDFS_RELAY], CACHE_FOLDER, DEFAULT_HDFS_RELAY_CACHE_FOLDER) misc.applyWhenOnList(self.context.model[SRC], MAVEN_REPOSITORIES) misc.applyWhenOnList(self.context.model[SRC], FOLDERS) misc.applyWhenOnList(self.context.model[SRC], FILES) misc.applyWhenOnList(self.context.model[SRC], TREES) groomMavenRepositories(self.context) groomFolders(self.context) groomFiles(self.context) groomTrees(self.context) # Handle scope exclusion if self.context.toExclude(SCOPE_FILES): scopeToRemove = [] for scope in model[DATA][FILES][SCOPE_BY_NAME]: if (self.context.toExclude(scope)): scopeToRemove.append(scope) for scope in scopeToRemove: del (model[DATA][FILES][SCOPE_BY_NAME][scope]) if (self.context.toExclude(SCOPE_HDFS) and HDFS in model[DATA]): model[DATA][HDFS][FILES] = [] model[DATA][HDFS][FOLDERS] = [] model[DATA][HDFS][TREES] = [] model[DATA][HDFS][NODE_TO_HDFS_BY_NAME] = {} if HDFS in model[DATA] and len( model[DATA][HDFS][NODE_TO_HDFS_BY_NAME]) == 0 and len( model[DATA][HDFS][FILES]) == 0 and len( model[DATA][HDFS][FOLDERS]) == 0 and len( model[DATA][HDFS][TREES]) == 0: # Optimization for execution time if HDFS_RELAY in model[SRC]: del (model[SRC][HDFS_RELAY]) setWillUseMavenRepo(model) setServiceNotifications(self.context)
def groomKafkaRelay(model): if KAFKA_RELAY in model[SRC]: if not KAFKA_TOPICS in model[SRC] or len( model[SRC][KAFKA_TOPICS]) == 0: del (model[SRC][KAFKA_RELAY]) else: hg = model[SRC][KAFKA_RELAY][ZK_HOST_GROUP] if hg not in model[DATA][INVENTORY][HOST_GROUP_BY_NAME]: misc.ERROR( "kafka_relay: host_group '{0}' does not exists!".format( hg)) misc.setDefaultInMap(model[SRC][KAFKA_RELAY], ZK_PORT, 2181) if BROKER_ID_MAP in model[SRC][KAFKA_RELAY]: for brokerId in model[SRC][KAFKA_RELAY][ BROKER_ID_MAP].itervalues(): if not isinstance(brokerId, int): misc.ERROR( "kafka_relay: BrokerId ({0}) must be integer". format(brokerId)) misc.setDefaultInMap(model[SRC][KAFKA_RELAY], ZK_PATH, '/') if BECOME_USER in model[SRC][KAFKA_RELAY]: model[SRC][KAFKA_RELAY][LOGS_USER] = model[SRC][KAFKA_RELAY][ BECOME_USER] misc.setDefaultInMap( model[SRC][KAFKA_RELAY], TOOLS_FOLDER, "/tmp/hadeploy_{}".format( model[SRC][KAFKA_RELAY][BECOME_USER])) else: model[SRC][KAFKA_RELAY][LOGS_USER] = "{{ansible_user}}" misc.setDefaultInMap(model[SRC][KAFKA_RELAY], TOOLS_FOLDER, DEFAULT_TOOLS_FOLDER)
def groomTmplFiles(f, model): misc.setDefaultInMap(f, DEST_NAME, os.path.basename(f[FSRC])) f[_TARGET_] = os.path.normpath(os.path.join(f[DEST_FOLDER], f[DEST_NAME])) path = f[FSRC][len('tmpl://'):] f[_DISPLAY_SRC_] = path if not path.startswith("/"): path = lookupInLocalTemplates(path, model) else: if not os.path.exists(path): misc.ERROR("'{0}' does not exists".format(path)) if os.path.isdir(path): misc.ERROR( "Files: '{0}' is is a folder. Can't be a template source. Use 'trees' block to copy a folder in a recursive way" .format(f[FSRC])) f[_SRC_] = path
def groomFiles(context): model = context.model if FILES in model[SRC]: for f in model[SRC][FILES]: misc.setDefaultInMap(f, NO_REMOVE, False) if f[FSRC].startswith('file://'): groomFileFiles(f, model) elif f[FSRC].startswith('http://') or f[FSRC].startswith( 'https://'): groomHttpFiles(f, model) elif f[FSRC].startswith('tmpl://'): groomTmplFiles(f, model) elif f[FSRC].startswith('node://'): groomNodeToHdfsFiles(f, model) elif f[FSRC].startswith('mvn://'): groomMavenFiles(f, model) else: misc.ERROR( "Files: {0} is not a valid form for 'src' attribute. Unknown scheme" .format(f[FSRC])) if f[SCOPE] == HDFS: if not HDFS_RELAY in model[SRC]: misc.ERROR( "Scope of file '{0}' is 'hdfs' while no hdfs_relay was defined!" .format(f[SRC])) else: model[DATA][HDFS][FILES].append(f) # This one is intended to be used in the cache f[_CACHE_TARGET_] = os.path.normpath( model[SRC][HDFS_RELAY][CACHE_FOLDER] + "/" + manglePath(f[_TARGET_])) model[DATA][HDFS][CACHEFOLDERS].add( os.path.dirname(f[_CACHE_TARGET_])) else: if not context.checkScope(f[SCOPE]): misc.ERROR( "File {0}: Scope attribute '{1}' does not match any host or host_group and is not 'hdfs'!" .format(f[FSRC], f[SCOPE])) else: if NODE_TO_HDFS_FLAG in f: ensureHdfsScope(model, f[SCOPE]) model[DATA][HDFS][NODE_TO_HDFS_BY_NAME][ f[SCOPE]][FILES].append(f) else: ensureScope(model, f[SCOPE]) context.model[DATA][FILES][SCOPE_BY_NAME][ f[SCOPE]][FILES].append(f)
def handleHostOverrides(model): if HOST_OVERRIDES in model[SRC]: for hover in model[SRC][HOST_OVERRIDES]: misc.setDefaultInMap(hover, PRIORITY, 100) hoverList = sorted(model[SRC][HOST_OVERRIDES], key = lambda hover: hover[PRIORITY]) for hover in hoverList: if hover[NAME] == 'all' or hover[NAME] == '*': if HOSTS in model[SRC]: for host in model[SRC][HOSTS]: handleHostOverride(host, hover) else: misc.ERROR("No host definition at all. Can't override") else: if hover[NAME] in model[DATA][INVENTORY][HOST_BY_NAME]: handleHostOverride(model[DATA][INVENTORY][HOST_BY_NAME][hover[NAME]], hover) else: misc.ERROR("Trying to override unexisting host: '{0}'".format(hover[NAME]))
def groomHiveDatabases(model): if HIVE_DATABASES in model[SRC] and len(model[SRC][HIVE_DATABASES]) > 0: if not HIVE_RELAY in model[SRC]: misc.ERROR( 'A hive_relay must be defined if at least one hive database is defined' ) for db in model[SRC][HIVE_DATABASES]: misc.setDefaultInMap(db, NO_REMOVE, False) if db[NAME] == 'default': misc.ERROR("HIVE database 'default' can't be altered") if (LOCATION in db) and (not db[LOCATION].startswith("/")): misc.ERROR("Database '{0}': Location must be absolute!".format( db[NAME])) if (OWNER in db) != (OWNER_TYPE in db): misc.ERROR( "Database '{0}': If an owner is defined, then owner_type (USER|GROUP|ROLE) must be also!" .format(db[NAME]))
def groomHdfsRelay(model): if HDFS_RELAY in model[SRC]: if model[SRC][HDFS_RELAY][HOST] not in model[DATA][INVENTORY][ HOST_BY_NAME]: misc.ERROR("hdfs_relay: hosts '{0}' is undefined".format( model[SRC][HDFS_RELAY][HOST])) # misc.setDefaultInMap(model[SRC][HDFS_RELAY], CACHE_FOLDER, DEFAULT_HDFS_RELAY_CACHE_FOLDER) # Also performed in file plugin if PRINCIPAL in model[SRC][HDFS_RELAY]: if USER in model[SRC][HDFS_RELAY]: misc.ERROR( "hdfs_relay: If a 'principal' is defined, then no 'user' should be defined, as all operations will be performed on behalf of the 'principal'" ) if LOCAL_KEYTAB_PATH not in model[SRC][ HDFS_RELAY] and RELAY_KEYTAB_PATH not in model[SRC][ HDFS_RELAY]: misc.ERROR( "hdfs_relay: Please provide a 'local_keytab_path' and/or a 'relay_keytab_path' if you want to use a Kerberos 'principal'" ) if LOCAL_KEYTAB_PATH in model[SRC][HDFS_RELAY]: if not os.path.exists( model[SRC][HDFS_RELAY][LOCAL_KEYTAB_PATH]): misc.ERROR( "hdfs_relay: local_keytab_file '{0}' does not exists!". format(model[SRC][HDFS_RELAY][LOCAL_KEYTAB_PATH])) if RELAY_KEYTAB_PATH not in model[SRC][HDFS_RELAY]: model[SRC][HDFS_RELAY][ _RELAY_KEYTAB_FOLDER_] = DEFAULT_HDFS_KEYTABS_FOLDER model[SRC][HDFS_RELAY][RELAY_KEYTAB_PATH] = os.path.join( model[SRC][HDFS_RELAY][_RELAY_KEYTAB_FOLDER_], os.path.basename( model[SRC][HDFS_RELAY][LOCAL_KEYTAB_PATH])) model[SRC][HDFS_RELAY][KERBEROS] = True model[SRC][HDFS_RELAY][USER] = "KERBEROS" misc.setDefaultInMap(model[SRC][HDFS_RELAY], KDEBUG, False) else: if LOCAL_KEYTAB_PATH in model[SRC][ HDFS_RELAY] or RELAY_KEYTAB_PATH in model[SRC][HDFS_RELAY]: misc.ERROR( "hdfs_relay: Please, provide a 'principal' if you need to use a keytab" ) model[SRC][HDFS_RELAY][KERBEROS] = False model[SRC][HDFS_RELAY][KDEBUG] = False sshUser = model[DATA][INVENTORY][HOST_BY_NAME][ model[SRC][HDFS_RELAY][HOST]][SSH_USER] misc.setDefaultInMap(model[SRC][HDFS_RELAY], USER, "hdfs" if sshUser == "root" else sshUser)
def groomUsers(context): model = context.model if USERS in model[SRC]: for usr in model[SRC][USERS]: misc.setDefaultInMap(usr, NO_REMOVE, False) misc.setDefaultInMap(usr, SCOPE, "all") misc.setDefaultInMap(usr, MANAGED, True) if not usr[MANAGED]: if SYSTEM in usr or CREATE_HOME in usr or 'group' in usr or GROUPS in usr or 'password' in usr or 'comment' in usr: misc.ERROR("User '{0}': When not managed, 'system', 'create_home', 'group', 'groups', 'comment' or 'password' attributes can't be defined!".format(usr['login'])) else: misc.setDefaultInMap(usr, SYSTEM, False) misc.setDefaultInMap(usr, CREATE_HOME, True) if not context.checkScope(usr[SCOPE]): misc.ERROR("User {0}: Scope attribute '{1}' does not match any host or host_group!".format(usr['login'], usr[SCOPE])) # We group operation per scope, to optimize ansible run. Note of a scope exists, it must hold both 'users' and 'groups' ensureScope(context, usr[SCOPE]) context.model[DATA][USERS][SCOPE_BY_NAME][usr[SCOPE]][USERS].append(usr)
def groomTrees(context): model = context.model if TREES in model[SRC]: for t in model[SRC][TREES]: misc.setDefaultInMap(t, NO_REMOVE, False) if t[SCOPE] == HDFS: if not HDFS_RELAY in model[SRC]: misc.ERROR( "Scope of tree '{0}' is 'hdfs' while no hdfs_relay was defined!" .format(t[FSRC])) else: # Need to setup cache now t[_CACHE_TARGET_] = os.path.normpath( model[SRC][HDFS_RELAY][CACHE_FOLDER] + "/" + manglePath(t[DEST_FOLDER])) model[DATA][HDFS][CACHEFOLDERS].add( os.path.dirname(t[_CACHE_TARGET_])) if t[FSRC].startswith('file://'): groomFileTrees(t, model) elif t[FSRC].startswith('tmpl://'): groomTmplTrees(t, model) elif t[FSRC].startswith('node://'): groomNodeToHdfsTrees(t, model) else: misc.ERROR( "Tree: {0} is not a valid form for 'src' attribute. Unknown scheme" .format(t[FSRC])) if t[SCOPE] == HDFS: model[DATA][HDFS][TREES].append(t) else: if not context.checkScope(t[SCOPE]): misc.ERROR( "Tree {0}: Scope attribute '{1}' does not match any host or host_group and is not 'hdfs'!" .format(t[FSRC], t[SCOPE])) else: if NODE_TO_HDFS_FLAG in t: ensureHdfsScope(model, t[SCOPE]) model[DATA][HDFS][NODE_TO_HDFS_BY_NAME][ t[SCOPE]][TREES].append(t) else: ensureScope(model, t[SCOPE]) context.model[DATA][FILES][SCOPE_BY_NAME][ t[SCOPE]][TREES].append(t)
def groomGroups(self): if self.context.toExclude(SCOPE_SUPERVISOR): return model = self.context.model if SUPERVISOR_GROUPS in model[SRC]: for grp in model[SRC][SUPERVISOR_GROUPS]: if not SUPERVISOR_BY_NAME in model[DATA][SUPERVISORS] or not grp[SUPERVISOR] in model[DATA][SUPERVISORS][SUPERVISOR_BY_NAME]: misc.ERROR("supervisor_group '{}' refer to an undefined supervisor '{}'".format(grp[NAME], grp[SUPERVISOR])) else: supervisord = model[DATA][SUPERVISORS][SUPERVISOR_BY_NAME][grp[SUPERVISOR]] if grp[NAME] in supervisord[GROUP_BY_NAME]: misc.ERROR("supervisor_group '{}' is defined twice in supervisor '{}'".format(grp[NAME], supervisord[NAME])) supervisord[GROUP_BY_NAME][grp[NAME]] = grp for prgName in grp[PROGRAMS]: if prgName not in supervisord[PROGRAM_BY_NAME]: misc.ERROR("supervisor_group '{}' refer to an undefined program '{}'".format(grp[NAME], prgName)) else: prg = supervisord[PROGRAM_BY_NAME][prgName] # The program name must be patched: prg[_NAME_] = grp[NAME] + ":" + supervisord[PROGRAM_BY_NAME][prgName][_NAME_] if _SCOPE_ in grp: if grp[_SCOPE_] != prg[SCOPE]: misc.ERROR("supervisor_group '{}' host programs with different scope ({} != {}). Must be same".format(grp[NAME], grp[_SCOPE_], prg[SCOPE])) else: grp[_SCOPE_] = prg[SCOPE] misc.setDefaultInMap(grp, NO_REMOVE, False) if grp[NO_REMOVE] and not supervisord[NO_REMOVE]: misc.ERROR("Supervisor_group '{}' has no remove flag set while its supervisor ({}) has not!".format(grp[NAME], supervisord[NAME])) grp[CONF_FILE_SRC_JJ2] = os.path.join(self.path, "templates/group.conf.jj2") grp[CONF_FILE_SRC_J2] = "supervisor_{}_group_{}.conf".format(supervisord[NAME], grp[NAME]) grp[CONF_FILE_DST] = os.path.join(supervisord[INCLUDE_DIR], "{}_grp.ini".format(grp[NAME])) grp[SUPERVISOR_OWNER] = supervisord[USER] grp[SUPERVISOR_GROUP] = supervisord[GROUP] grp[SUPERVISOR_CONF] = supervisord[CONF_FILE_DST] grp[_NAME_] = grp[NAME] + ":" # ---------------------- Insert in scope misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [grp[_SCOPE_], GROUPS_TO_MANAGE], []) model[DATA][SUPERVISORS][SCOPE_BY_NAME][grp[_SCOPE_]][GROUPS_TO_MANAGE].append(grp) if not grp[NO_REMOVE]: misc.ensureObjectInMaps(self.context.model[DATA][SUPERVISORS][SCOPE_BY_NAME], [grp[_SCOPE_], GROUPS_TO_REMOVE], []) model[DATA][SUPERVISORS][SCOPE_BY_NAME][grp[_SCOPE_]][GROUPS_TO_REMOVE].append(grp)
def groomFolders(context): model = context.model if FOLDERS in model[SRC]: for folder in model[SRC][FOLDERS]: misc.setDefaultInMap(folder, NO_REMOVE, False) if folder[SCOPE] == HDFS: if not HDFS_RELAY in model[SRC]: misc.ERROR( "Scope of folder '{0}' is 'hdfs' while no hdfs_relay was defined!" .format(folder['path'])) else: model[DATA][HDFS][FOLDERS].append(folder) else: if not context.checkScope(folder[SCOPE]): misc.ERROR( "Folder {0}: Scope attribute '{1}' does not match any host or host_group and is not 'hdfs'!" .format(folder['path'], folder[SCOPE])) else: ensureScope(model, folder[SCOPE]) context.model[DATA][FILES][SCOPE_BY_NAME][ folder[SCOPE]][FOLDERS].append(folder)
def groomGroups(context): model = context.model if GROUPS in model[SRC]: for grp in model[SRC][GROUPS]: misc.setDefaultInMap(grp, SYSTEM, False) misc.setDefaultInMap(grp, MANAGED, True) misc.setDefaultInMap(grp, SCOPE, "all") misc.setDefaultInMap(grp, NO_REMOVE, False) if not context.checkScope(grp[SCOPE]): misc.ERROR("Group {0}: Scope attribute '{1}' does not match any host or host_group!".format(grp['name'], grp[SCOPE])) # We group operation per scope, to optimize ansible run. Note of a scope exists, it must hold both 'users' and 'groups' ensureScope(context, grp[SCOPE]) context.model[DATA][USERS][SCOPE_BY_NAME][grp[SCOPE]][GROUPS].append(grp)