def validate(model, schema): k = kwalify(source_data=model, schema_data=schema) try: k.validate(raise_exception=False) if len(k.errors) != 0: misc.ERROR("Problem {0}".format(k.errors)) except Exception as e: # Need to catch, as, despite raise_exeception=False, some cases still generate exception (ie map/list mismatch) misc.ERROR("Problem {0}".format(e))
def setScalar(self, event): logger.debug( "setScalar({0}) start_mark:{1} end_mark:{2} style:{3}".format( event, event.start_mark, event.end_mark, event.style)) value = event.value if '{' in value: thevars = self.vars #logger.debug("*********Will pass '{0}' through jinja2 with {1}".format(value, str(thevars))) try: value = jinja2.Template(value, undefined=jinja2.StrictUndefined, trim_blocks=True, block_start_string="%{", block_end_string="}", variable_start_string="${", variable_end_string="}", comment_start_string="#{", comment_end_string="}").render(thevars) except Exception as e: misc.ERROR("{0} {1} (path:{2})".format(str(e), event.start_mark, str(self.path))) ##logger.debug("*********Result is {0}".format(value)) if '>>' in value: thevars = self.vars #logger.debug("*********Will pass '{0}' through jinja2 with {1}".format(value, str(thevars))) try: value = jinja2.Template( value, undefined=jinja2.StrictUndefined, trim_blocks=True, block_start_string="%<", block_end_string=">>", variable_start_string="<<", variable_end_string=">>", comment_start_string="#<", comment_end_string=">>").render(thevars) except Exception as e: misc.ERROR("{0} {1} (path:{2})".format(str(e), event.start_mark, str(self.path))) ##logger.debug("*********Result is {0}".format(value)) value = value.encode('utf8') if event.style == u'"' or event.style == u'\'': return value else: try: return int(value) except: try: return float(value) except: if trueRegex.match(value): return True elif falseRegex.match(value): return False else: return value
def getFileFromHdfs(self, hdfsPath, localPath, overwrite): logger.debug("getFileFromHdfs(localPath={0}, hdfsPath={1})".format(localPath, hdfsPath)) if os.path.exists(localPath) and not overwrite: misc.ERROR("Local file {0} already exists. Will not overwrite it!".format(localPath)) f = open(localPath, "wb") url = "http://{0}/webhdfs/v1{1}?{2}op=OPEN".format(self.endpoint, hdfsPath, self.auth) resp = requests.get(url, allow_redirects=True, stream=True) logger.debug(url + " -> " + str(resp.status_code)) if not resp.status_code == 200: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'".format(resp.status_code, url)) for chunk in resp.iter_content(chunk_size=10240, decode_unicode=False): f.write(chunk) f.close()
def putFileToHdfs(self, localPath, hdfsPath, overwrite): logger.debug("putFileToHdfs(localPath={0}, hdfsPath={1})".format(localPath, hdfsPath)) url = "http://{0}/webhdfs/v1{1}?{2}op=CREATE&overwrite={3}".format(self.endpoint, hdfsPath, self.auth, "true" if overwrite else "false") resp = requests.put(url, allow_redirects=False) logger.debug(url + " -> " + str(resp.status_code)) if not resp.status_code == 307: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'".format(resp.status_code, url)) url2 = resp.headers['location'] logger.debug(url2) f = open(localPath, "rb") resp2 = requests.put(url2, data=f, headers={'content-type': 'application/octet-stream'}) logger.debug(url2 + " -> " + str(resp2.status_code)) if not resp2.status_code == 201: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'".format(resp2.status_code, url2))
def lookup(p): if p.webhdfsEndpoint == None: if not os.path.isdir(p.hadoopConfDir): misc.ERROR("{0} must be an existing folder, or --hadoopConfDir or --webhdfsEndpoint provided as parameter.".format(p.hadoopConfDir)) candidates = [] hspath = os.path.join(p.hadoopConfDir, "hdfs-site.xml") NN_HTTP_TOKEN1 = "dfs.namenode.http-address" NN_HTTP_TOKEN2 = "dfs.http.address" # Deprecated if os.path.isfile(hspath): doc = minidom.parse(hspath) properties = doc.getElementsByTagName("property") for prop in properties : name = prop.getElementsByTagName("name")[0].childNodes[0].data if name.startswith(NN_HTTP_TOKEN1) or name.startswith(NN_HTTP_TOKEN2): candidates.append(prop.getElementsByTagName("value")[0].childNodes[0].data) if not candidates: misc.ERROR("Unable to find {0}* or {1}* in {2}. Provide explicit 'webhdfs_endpoint'", NN_HTTP_TOKEN1, NN_HTTP_TOKEN2, hspath) errors = [] for endpoint in candidates: webHDFS= WebHDFS(endpoint, p.hdfsUser) (x, err) = webHDFS.test() if x: p.webhdfsEndpoint = webHDFS.endpoint return webHDFS else: errors.append(err) misc.ERROR("Unable to find a valid 'webhdfs_endpoint' in hdfs-site.xml:" + str(errors)) else: misc.ERROR("Unable to find file {0}. Provide 'webhdfs_endpoint' or 'hadoop_conf_dir' parameter", hspath) else: candidates = p.webhdfsEndpoint.split(",") errors = [] for endpoint in candidates: webHDFS= WebHDFS(endpoint, p.hdfsUser) (x, err) = webHDFS.test() if x: p.webhdfsEndpoint = webHDFS.endpoint return webHDFS else: errors.append(err) misc.ERROR("Unable to find a valid 'webhdfs_endpoint' in: " + str(p.webhdfsEndpoint) + " (" + str(errors) + ")")
def __init__(self, endpoint, hdfsUser): self.endpoint = endpoint self.delegationToken = None self.auth = None if hdfsUser == "KERBEROS": self.kerberos = True if not HAS_KERBEROS: misc.ERROR("'python-requests-kerberos' package is not installed") else : self.kerberos = False self.auth = "user.name=" + hdfsUser + "&"
def loadPlugin(self, name, paths): for p in paths: path = os.path.join(p, name) if os.path.isdir(path): codeFile = os.path.join(path, "code.py") if os.path.isfile(codeFile): ##logger.debug("Plugin '{0}': Will load code from '{1}'".format(name, codeFile)) module = imp.load_source(name, codeFile) pluginClass = None for _, obj in inspect.getmembers(module): if inspect.isclass(obj): #logger.debug("Name: {0} Obj:{1}".format('className', obj)) bases = obj.__bases__ for base in bases: if base == Plugin: pluginClass = obj if pluginClass == None: misc.ERROR( "Invalid plugin '{0}' code.py: Missing MyPlugin(Plugin) class" .format(name)) else: #logger.debug("Plugin '{0}': Found class {1}".format(name, str(pluginClass))) plugin = pluginClass(name, path, self) logger.debug( "Loaded plugin '{0}' with 'code.py' module (path:'{1}')" .format(name, path)) else: # Plugin without code (Impossible since plugin refactoring. Kept in cases) logger.debug( "Loaded plugin '{0}' without 'code.py' module (path:'{1}')" .format(name, path)) plugin = Plugin(name, path, self) self.plugins.append(plugin) self.pluginByName[plugin.name] = plugin return misc.ERROR( "Unable to find a plugin of name '{0}' in plugin paths {1}".format( name, paths))
def getDirContent(self, path): url = "http://{0}/webhdfs/v1{1}?{2}op=LISTSTATUS".format(self.endpoint, path, self.auth) resp = requests.get(url) logger.debug(url + " -> " + str(resp.status_code)) dirContent = {} dirContent['status'] = "OK" dirContent['files'] = [] dirContent['directories'] = [] if resp.status_code == 200: result = resp.json() #print misc.pprint2s(result) for f in result['FileStatuses']['FileStatus']: if f['type'] == 'FILE': fi = {} fi['name'] = f['pathSuffix'] fi['size'] = f['length'] fi['modificationTime'] = f['modificationTime']/1000 fi['mode'] = "0" + f['permission'] fi['owner'] = f['owner'] fi['group'] = f['group'] dirContent['files'].append(fi) elif f['type'] == 'DIRECTORY': di = {} di['name'] = f['pathSuffix'] #di['modificationTime'] = f['modificationTime']/1000 di['mode'] = "0" + f['permission'] di['owner'] = f['owner'] di['group'] = f['group'] dirContent['directories'].append(di) else: misc.ERROR("Unknown directory entry type: {0}".format(f['type'])) elif resp.status_code == 404: dirContent['status'] = "NOT_FOUND" elif resp.status_code == 403: dirContent['status'] = "NO_ACCESS" else: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'".format(resp.status_code, url)) return dirContent
def getPathTypeAndStatus(self, path): url = "http://{0}/webhdfs/v1{1}?{2}op=GETFILESTATUS".format(self.endpoint, path, self.auth) resp = requests.get(url) logger.debug(url + " -> " + str(resp.status_code)) if resp.status_code == 200: result = resp.json() fs = {} fs['size'] = result['FileStatus']['length'] fs['modificationTime'] = result['FileStatus']['modificationTime']/1000 fs['mode'] = "0" + result['FileStatus']['permission'] fs['owner'] = result['FileStatus']['owner'] fs['group'] = result['FileStatus']['group'] return (result['FileStatus']['type'], fs) elif resp.status_code == 404: return ("NOT_FOUND", None) elif resp.status_code == 403: return ("NO_ACCESS", None) else: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'".format(resp.status_code, url))
def parse(self, fileName): if fileName == None: initVars = {} initVars["vars"] = {} stream = yaml.dump(initVars) elif "=" in fileName: clivars = {} clivars['vars'] = {} x = fileName.split("=") if len(x) != 2: misc.ERROR( "Invalid variable syntax: '{0}'. Must be name=value (without space)" .format(fileName)) clivars['vars'][x[0]] = x[1] stream = yaml.dump(clivars) else: if not os.path.isfile(fileName): misc.ERROR("'{0}' is not a readable file!".format(fileName)) location = os.path.dirname(os.path.realpath(fileName)) logger.debug( "----------------------- Will parse file '{0}' (location:'{1}')" .format(fileName, location)) stream = open(fileName) for event in yaml.parse(stream): if isinstance(event, yaml.events.StreamStartEvent): logger.debug("--- StreamStartEvent") pass elif isinstance(event, yaml.events.DocumentStartEvent): logger.debug("--- DocumentStartEvent") elif isinstance(event, yaml.events.MappingStartEvent): logger.debug("--- MappingStartEvent:" + str(event)) if self.state == State.WAITING_FOR_TYPE: self.path.setTopType(PathItem.MAP) if self.path.len() == 2 and self.path.top( ).name == u'vars': logger.debug("*** vars ({0}) looked up!".format( self.path.top().object)) self.vars = self.path.top().object if event.anchor != None: logger.debug( "************************************** ANCHOR {0}" .format(event.anchor)) self.anchors[event.anchor] = self.path.top().object self.setState(State.WAITING_FOR_MAP_ENTRY) elif self.state == State.BETWEEN_DOCS: if self.path.top().type != PathItem.MAP: misc.ERROR( "Can't merge to document of different type (First is a MAP while other is a SEQ)" ) else: self.setState(State.WAITING_FOR_MAP_ENTRY) elif self.state == State.WAITING_FOR_SEQ_ENTRY: # A map in a sequence self.path.add(PathItem("?", PathItem.MAP)) if event.anchor != None: logger.debug( "************************************** ANCHOR {0}" .format(event.anchor)) self.anchors[event.anchor] = self.path.top().object self.setState(State.WAITING_FOR_MAP_ENTRY) else: self.invalidEventError(event) elif isinstance(event, yaml.events.MappingEndEvent): logger.debug("--- MappingEndEvent") if self.state == State.WAITING_FOR_MAP_ENTRY: if self.path.len() > 1: self.register(fileName) self.path.reduce() self.adjustStateFromTop() else: logger.debug("Found MAP root") self.setState(State.BETWEEN_DOCS) else: self.invalidEventError(event) elif isinstance(event, yaml.events.SequenceStartEvent): logger.debug("--- SequenceStartEvent:" + str(event)) if self.state == State.WAITING_FOR_TYPE: self.path.setTopType(PathItem.SEQ) self.setState(State.WAITING_FOR_SEQ_ENTRY) elif self.state == State.WAITING_FOR_SEQ_ENTRY: # A sequence in a sequence self.path.add(PathItem("?", PathItem.SEQ)) self.setState(State.WAITING_FOR_SEQ_ENTRY) elif self.state == State.BETWEEN_DOCS: if self.path.top().type != PathItem.SEQ: misc.ERROR( "Can't merge to document of different type (First is a SEQ while other is a MAP)" ) else: self.setState(State.WAITING_FOR_SEQ_ENTRY) else: self.invalidEventError(event) elif isinstance(event, yaml.events.SequenceEndEvent): logger.debug("--- SequenceEndEvent") if self.state == State.WAITING_FOR_SEQ_ENTRY: if self.path.len() > 1: self.register(fileName) self.path.reduce() self.adjustStateFromTop() else: logger.debug("Found SEQ root") self.setState(State.BETWEEN_DOCS) else: self.invalidEventError(event) elif isinstance(event, yaml.events.ScalarEvent): logger.debug("--- ScalarEvent:" + str(event)) if self.state == State.WAITING_FOR_MAP_ENTRY: self.path.add(PathItem(event.value, PathItem.UNKNOWN)) self.setState(State.WAITING_FOR_TYPE) elif self.state == State.WAITING_FOR_SEQ_ENTRY: self.path.add(PathItem("?", PathItem.SCALAR)) self.path.top().object = self.setScalar(event) self.register(fileName) if self.path.len( ) == 3 and self.path.path[1].name == "include": included = os.path.join(location, self.path.top().object) logger.debug( "********************* Path:'{0}' -> INCLUDE '{1}' from SEQ" .format(repr(self.path), included)) self.setState(State.BETWEEN_DOCS) self.path.reduce() self.path.reduce() self.adjustRelativePath(fileName) self.parse(included) self.path.add(PathItem("include", PathItem.SEQ)) logger.debug( "********************* Path:'{0}' Back from include '{1}'" .format(repr(self.path), included)) self.setState(State.WAITING_FOR_SEQ_ENTRY) # No change else: self.path.reduce() self.setState(State.WAITING_FOR_SEQ_ENTRY) # No change elif self.state == State.WAITING_FOR_TYPE: self.path.top().type = PathItem.SCALAR self.path.top().object = self.setScalar(event) self.register(fileName) if self.path.len( ) == 2 and self.path.path[1].name == "include": included = os.path.join(location, self.path.top().object) logger.debug( "********************* Path:'{0}' -> INCLUDE '{1}' from SINGLE" .format(repr(self.path), included)) self.path.reduce() self.setState(State.BETWEEN_DOCS) self.adjustRelativePath(fileName) self.parse(included) logger.debug( "********************* Path:{0} Back from include '{1}'" .format(repr(self.path), included)) self.adjustStateFromTop() elif self.path.len( ) == 2 and self.path.path[1].name == "vars": misc.ERROR( "'vars' entry must be a map!" ) # Detect case where there is a vars: block without any entry. In such case, this is interpreted as a scalar and hide others variables else: self.path.reduce() self.adjustStateFromTop() else: self.invalidEventError(event) elif isinstance(event, yaml.events.AliasEvent): logger.debug("--- AliasEvent:" + str(event)) logger.debug("Path:'{0}'".format(repr(self.path))) if event.anchor not in self.anchors: misc.ERROR("Alias &{0} not found ({1})".format( event.anchor, event.start_mark)) else: self.path.reduceWithAliasMap(self.anchors[event.anchor]) self.adjustStateFromTop() elif isinstance(event, yaml.events.DocumentEndEvent): logger.debug("--- DocumentEndEvent") pass elif isinstance(event, yaml.events.StreamEndEvent): logger.debug("--- StreamEndEvent") pass else: raise Exception("Unknown event:" + repr(event)) logger.debug("End or parsing: Anchors:{0}".format(str(self.anchors))) # Adjust some environment variable, as they are relative to source file path self.adjustRelativePath(fileName)
def getPriority(self, action): misc.ERROR( "Plugin '{0}' is missing getPriority() method for action {1}". format(self.name, action))
def put(self, url): resp = requests.put(url, allow_redirects=False) logger.debug(url + " -> " + str(resp.status_code)) if resp.status_code != 200: misc.ERROR("Invalid returned http code '{0}' when calling '{1}'", resp.status_code, url)
def parseArg(mydir): parser = argparse.ArgumentParser() parser.add_argument('--src', required=True) parser.add_argument('--dest', required=True) parser.add_argument('--checkMode', action='store_true') parser.add_argument('--report', action='store_true') parser.add_argument('--reportFiles', action='store_true') parser.add_argument('--nbrThreads', required=False) parser.add_argument('--yamlLoggingConf', help="Logging configuration as a yaml file") parser.add_argument('--force', action='store_true') parser.add_argument('--backup', action='store_true') parser.add_argument('--owner', required=False, help="owner for all files.") parser.add_argument('--group', required=False, help="group for all files.") parser.add_argument('--mode', required=False, help="mode for all files.") parser.add_argument('--directoryMode', required=False) parser.add_argument('--forceExt', action='store_true') parser.add_argument( '--hdfsUser', required=False, default="hdfs", help="Default: 'hdfs'. Set to 'KERBEROS' to use Kerberos authentication" ) parser.add_argument('--hadoopConfDir', required=False, default="/etc/hadoop/conf") parser.add_argument('--webhdfsEndpoint', required=False) params = parser.parse_args() p = Parameters() p.src = params.src p.dest = params.dest p.checkMode = params.checkMode p.report = params.report p.reportFiles = params.reportFiles p.nbrThreads = params.nbrThreads p.yamlLoggingConf = params.yamlLoggingConf p.force = params.force p.backup = params.backup p.owner = params.owner p.group = params.group p.mode = params.mode p.directoryMode = params.directoryMode p.forceExt = params.forceExt p.hdfsUser = params.hdfsUser p.hadoopConfDir = params.hadoopConfDir p.webhdfsEndpoint = params.webhdfsEndpoint p.loggingConfFile = os.path.join(mydir, "./logging.yml") if p.yamlLoggingConf != None: p.loggingConfFile = p.yamlLoggingConf if not os.path.isfile(p.loggingConfFile): misc.ERROR("'{0}' is not a readable file!".format( p.loggingConfFile)) if p.nbrThreads != None: p.nbrThreads = int(p.nbrThreads) else: p.nbrThreads = 1 if p.reportFiles: p.report = True # Some checks if p.mode != None: if not isinstance(p.mode, int): try: p.mode = int(p.mode, 8) except Exception: misc.ERROR("mode must be in octal form") p.mode = oct(p.mode) #print '{ mode_type: "' + str(type(p.mode)) + '", mode_value: "' + str(p.mode) + '"}' return p
def main(): mydir = os.path.dirname(os.path.realpath(__file__)) parser = argparse.ArgumentParser() parser.add_argument('--src', nargs='*', required=True) parser.add_argument('--action', required=True) parser.add_argument('--scope', nargs='*', required=False) parser.add_argument('--noScope', nargs='*', required=False) parser.add_argument('--yamlLoggingConf', help="Logging configuration as a yaml file", required=False) parser.add_argument('--workingFolder', help="Where to store working context", required=True) param = parser.parse_args() if param.yamlLoggingConf != None: loggingConfFile = param.yamlLoggingConf else: loggingConfFile = os.path.join(mydir, "conf/logging.yml") if not os.path.isfile(loggingConfFile): misc.ERROR("'{0}' is not a readable file!".format(loggingConfFile)) logging.config.dictConfig(yaml.load(open(loggingConfFile))) logger.debug("mydir:" + mydir) logger.debug("param.src:" + str(param.src)) workingFolder = param.workingFolder if not os.path.isdir(workingFolder): misc.ERROR("{0} must be an existing folder".format(workingFolder)) if len(os.listdir(workingFolder)) > 0: misc.ERROR("{0} must be an existing EMPTY folder".format(workingFolder)) # ----- We must make a first read of the file, with only the 'master' plugin to fetch plugins list and path masterPluginPath = os.path.abspath(os.path.join(mydir, ".")) context = Context(workingFolder) context.loadPlugin("master", [masterPluginPath]) handleSourceFiles(param.src, context, None) context.groom() # --------------------------------------------- included scope handling context.includedScopes = handleCliScopes(param.scope) if len(context.includedScopes) == 0 and INCLUDED_SCOPES in context.model[SRC]: context.includedScopes = set(context.model[SRC][INCLUDED_SCOPES]) if len(context.includedScopes) > 0: print("Scope limited to {0}".format(str(list(context.includedScopes)))) # -------------------------------------------- Excluded scope handling context.excludedScopes = handleCliScopes(param.noScope) if EXCLUDED_SCOPES in context.model[SRC]: context.excludedScopes = context.excludedScopes.union(context.model[SRC][EXCLUDED_SCOPES]) if len(context.excludedScopes) > 0: print("Scope excluded: {0}".format(str(list(context.excludedScopes)))) # Now, we must have the effective PLUGINS list and PLUGINS_PATHS in the context. We can load all plugins for plName in context.model[SRC][PLUGINS]: context.loadPlugin(plName, context.model[SRC][PLUGINS_PATHS]) # And reload source files, with now all plugins activated fileByVariable = {} if param.action == "dumpvars" else None handleSourceFiles(param.src, context, fileByVariable) if 'include' in context.model[SRC]: del(context.model[SRC]['include']) # Must remove, as not part of the schema # Now, build the schema for source validation, by merge of all schema plugin theSchema = context.getSchema() dump.dumpSchema(context, theSchema) #dump.dumpModel(context) # And validate against this schema schema.validate(context.model[SRC], theSchema) # And groom all plugins context.groom() dump.dumpModel(context) # Check scopes validity # NB: We perform this after grooming, even if grooming can rely on scope. Aims is only to detect scopes with typo. supportedScopes = context.getAllSupportedScopes() scopesToTest = context.excludedScopes.union(context.includedScopes) for scope in scopesToTest: if scope != "all" and scope != "none" and not context.checkScope(scope) and scope not in supportedScopes: # checkScope(): Scope for target file/folders (hosts and hostgroups) misc.ERROR("Scope '{0}' is not supported!".format(scope)) templator = Templator([os.path.join(mydir, './templates'), context.workingFolder], context.model) actions = context.getAllSupportedActions() logger.debug("Supported actions: {0}".format(actions)) action = param.action if action == "none": for action in actions: pluginExts = context.getPluginExtForAction(action) logger.debug("Action: {0} -> plugins: {1}".format(action, pluginExts)) context.buildTemplate(action, pluginExts) context.builRolesPath(action, pluginExts) context.generateAuxTemplates(action, pluginExts) templator.generate("{0}.yml.jj2".format(action), os.path.join(context.workingFolder, "{0}.yml".format(action))) elif action == "dumpvars": if SRC in context.model and VARS in context.model[SRC]: print("---") variables = context.model[SRC][VARS] for name in sorted(variables): x = yaml.dump(variables[name], default_flow_style=True, default_style=None, explicit_end=False) p = x.find("\n...\n") if p > 0: x = x[:-5] p = x.find("\n") if p > 0: x = x[:-1] print("{}: {} ({})".format(name, x, fileByVariable[name] if name in fileByVariable else "??")) print("---") #txt = yaml.dump(context.model[SRC][VARS], default_flow_style=False, default_style=None) return else: if not action in actions: misc.ERROR("Action {0} not supported. Current configuration only supports {1}".format(action, str(actions))) pluginExts = context.getPluginExtForAction(action) logger.debug("Action: {0} -> plugins: {1}".format(action, pluginExts)) context.buildTemplate(action, pluginExts) context.builRolesPath(action, pluginExts) context.generateAuxTemplates(action, pluginExts) templator.generate("{0}.yml.jj2".format(action), os.path.join(context.workingFolder, "{0}.yml".format(action))) templator.generate("inventory.jj2", os.path.join(context.workingFolder, "inventory")) templator.generate("ansible.cfg.jj2", os.path.join(context.workingFolder, "ansible.cfg")) misc.ensureFolder(os.path.join(context.workingFolder, "group_vars")) templator.generate("group_vars_all.jj2", os.path.join(context.workingFolder, "group_vars/all"))