class parseApp(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt','2','workflow',True).getlog() def delete_HDFS_dir(self, impl, hdfs_dir): cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir) cmdStr += "if [ $? -ne 0 ]; then\n" cmdStr += " %s %s\n" % (impl.fs_cmd.delete, hdfs_dir) cmdStr += "fi\n" return cmdStr def splitext(self, p): if not os.path.exists(p): return False else: return os.path.splitext(os.path.basename(p)) def mkdir(self, *args): p = _join_abs_file_name(*args) self.directories.add(p) return p def mkfile(self, name, data): if not os.path.isabs(name): raise RuntimeError('not an absolute path: %s' % name) name = os.path.normpath(name) self.files[name] = data return name def paramRectify(self,paramStr,paramElem,mustBe=True): if mustBe: if paramStr.find(paramElem) == -1: paramStr += " %s " % paramElem elif paramStr.find(paramElem) != -1: return paramStr.replace('-I', '') return paramStr def fileAppend(self,fh,commands,JobParamList={}): t = _generate_template(commands) for param in JobParamList: print >>fh, t.safe_substitute(param) def write_file(self,fileName,scriptsdir,commands,JobParamList=None,paramDict={},addShellHeader=False): scriptDict = bundle() scriptDict.script = [] t = _generate_template(commands) m = re.match('.*\$\{(\S+)\}.*',fileName) if JobParamList and m: for d in JobParamList: if not d.has_key(m.group(1)): self.log.error("Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1)) if paramDict: d.update(paramDict) file_name = _generate_template(fileName).safe_substitute(d) scriptFile = os.path.join(scriptsdir,file_name) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') print >>script, t.safe_substitute(d) else: scriptFile = os.path.join(scriptsdir,fileName) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if addShellHeader: print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % os.path.splitext(fileName)[0] _script_append(script, t, JobParamList, paramDict) print >>script, "echo ==========end %s at : `date` ========== " % os.path.splitext(fileName)[0] else: _script_append(script, t, JobParamList, paramDict) script.close() return scriptDict def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) scriptFile = os.path.join(scriptsdir,name+'.sh') script = open(scriptFile, 'w') print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, JobParamList, paramDict) print >>script, "" print >>script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptFile def write_scripts(self, name, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) for d in JobParamList: scriptsdir = d.get('SCRDIR') sampleName = d.get('SAMPLE') if not scriptsdir or not sampleName: self.log.error("Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) exit(1) if paramDict: d.update(paramDict) scriptDict[sampleName] = os.path.join(scriptsdir,name+'.sh') script = open(scriptDict[sampleName], 'w') print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, paramDict=d) print >>script, "" print >>script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptDict def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True): scriptDict = bundle() t = _generate_template(commands) scriptDict["script"] = [] for n, d in enumerate(JobParamList): if paramDict: d.update(paramDict) dataTag = str(n) if d.get('DATATAG'): dataTag = d.get('DATATAG') scriptFile = os.path.join(scriptsdir,name+'_'+ dataTag +'.sh') scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if reducer: print >>script, t.safe_substitute(d) else: print >>script, '#!/bin/bash' print >>script, "echo ==========start %s at : `date` ========== %s" % name print >>script, t.safe_substitute(d) # print >>script, "\n" print >>script, "echo ==========end %s at : `date` ========== %s" % name script.close() return scriptDict
class impl(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.files = {} self.allocDirectories = [] self.directories = set() self.steps = [] self.services = [] self.notifications = [] self.log = Logger('log.txt', '2', 'workflow', True).getlog() # def delete_HDFS_dir(self, impl, hdfs_dir): # cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir) # cmdStr += "if [ $? -ne 0 ]; then\n" # cmdStr += " %s %s\n" % (impl.fs_cmd.delete, hdfs_dir) # cmdStr += "fi\n" # return cmdStr def splitext(self, p): if not os.path.exists(p): return False else: return os.path.splitext(os.path.basename(p)) def expath(self, path, name, chechExists=''): if not name: if chechExists: raise RuntimeError('Program is not exists: %s' % chechExists) else: return False if os.path.exists(name): return name else: for p in path.split(':'): if os.path.exists(os.path.join(p, name)): return os.path.join(p, name) if chechExists: raise RuntimeError('Program is not exists: %s' % chechExists) return False def mkdir(self, *args): p = _join_abs_file_name(*args) self.directories.add(p) return p def mkfile(self, name, data): if not os.path.isabs(name): raise RuntimeError('not an absolute path: %s' % name) name = os.path.normpath(name) self.files[name] = data return name def paramRectify(self, paramStr, paramElem, mustBe=True): if mustBe: if paramStr.find(paramElem) == -1: paramStr += " %s " % paramElem elif paramStr.find(paramElem) != -1: return paramStr.replace(paramElem, '') return paramStr def paramCheck(self, mustBe, paramStr, paramElem, paramValue=None): if mustBe: if paramStr.find(paramElem) == -1: if paramValue is not None: paramStr += " %s %s " % (paramElem, paramValue) else: paramStr += " %s " % paramElem elif paramValue is not None: m = re.match('.*%s\s+(\w+).*' % paramElem, paramStr) if m and m.group(1) != paramValue: strinfo = re.compile('%s\s+\w+' % paramElem) newParamStr = strinfo.sub( '%s %s' % (paramElem, paramValue), paramStr) self.log.warning("Rectify parameter '%s' to '%s'" % (paramStr, newParamStr)) return newParamStr elif paramStr.find(paramElem) != -1: return paramStr.replace(paramElem, '') return paramStr def hasParam(self, paramStr, paramElem): if paramStr.find(paramElem) != -1: return True return False def fileAppend(self, fh, commands, JobParamList={}): t = _generate_template(commands) for param in JobParamList: print >> fh, t.safe_substitute(param) def write_file(self, fileName, scriptsdir, commands, JobParamList=None, paramDict={}, addShellHeader=False): scriptDict = bundle() scriptDict.script = [] t = _generate_template(commands) m = re.match('.*\$\{(\S+)\}.*', fileName) if JobParamList and m: for d in JobParamList: if not d.has_key(m.group(1)): self.log.error( "Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1)) if paramDict: d.update(paramDict) file_name = _generate_template(fileName).safe_substitute(d) scriptFile = os.path.join(scriptsdir, file_name) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') print >> script, t.safe_substitute(d) else: scriptFile = os.path.join(scriptsdir, fileName) scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if addShellHeader: print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % os.path.splitext( fileName)[0] _script_append(script, t, JobParamList, paramDict) print >> script, "echo ==========end %s at : `date` ========== " % os.path.splitext( fileName)[0] else: _script_append(script, t, JobParamList, paramDict) script.close() return scriptDict def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}): t = _generate_template(commands) scriptFile = os.path.join(scriptsdir, name + '.sh') script = open(scriptFile, 'w') print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, JobParamList, paramDict) print >> script, "" print >> script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptFile def write_scripts(self, name, commands, JobParamList=[], paramDict={}): scriptDict = bundle() t = _generate_template(commands) if paramDict: t = Template(t.safe_substitute(paramDict)) for d in JobParamList: scriptsdir = d.get('SCRDIR') sampleName = d.get('SAMPLE') if not scriptsdir or not sampleName: self.log.error( "Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) exit(1) scriptDict[sampleName] = os.path.join(scriptsdir, name + '.sh') script = open(scriptDict[sampleName], 'w') print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ==========" % name _script_append(script, t, paramDict=d) print >> script, "" print >> script, "echo ==========end %s at : `date` ========== " % name script.close() return scriptDict def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True): scriptDict = bundle() t = _generate_template(commands) scriptDict["script"] = [] for n, d in enumerate(JobParamList): if paramDict: d.update(paramDict) dataTag = str(n) if d.get('DATATAG'): dataTag = d.get('DATATAG') scriptFile = os.path.join(scriptsdir, name + '_' + dataTag + '.sh') scriptDict["script"].append(scriptFile) script = open(scriptFile, 'w') if reducer: print >> script, t.safe_substitute(d) else: print >> script, '#!/bin/bash' print >> script, "echo ==========start %s at : `date` ========== %s" % name print >> script, t.safe_substitute(d) # print >>script, "\n" print >> script, "echo ==========end %s at : `date` ========== %s" % name script.close() return scriptDict
# try: # configInfo = bundle(clean(json.loads(data))) # except Exception,e: # print Exception,"%s, "%self.config,e # configInfo.rupdate(userConf) if not configInfo.has_key('Path'): configInfo.Path = bundle() if not configInfo.has_key('self_defined'): configInfo.self_defined = bundle() necessary_property = ( "analysis_flow", "hadoop", "file", "ref") for np in necessary_property: if not configInfo.has_key(np): logger.error("config property: %s is not set" % np) exit(2) if configInfo.hadoop.get('is_at_TH'): if isinstance(configInfo.hadoop.is_at_TH,str): if configInfo.hadoop.is_at_TH == 'true' or configInfo.hadoop.is_at_TH == 'True': configInfo.hadoop.is_at_TH = True else: configInfo.hadoop.is_at_TH = False if configInfo.hadoop.has_key('root'): configInfo.hadoop.bin = os.path.join(configInfo.hadoop.root,'bin/hadoop') configInfo.hadoop.streamingJar = os.path.join(configInfo.hadoop.root,'contrib/streaming/hadoop-streaming-0.20.2-cdh3u6.jar') if not os.path.exists(configInfo.hadoop.bin): logger.error("hadoop path: %s is not exists" % configInfo.hadoop.bin) if not os.path.exists(configInfo.hadoop.streamingJar):