Example #1
0
class parseApp(object):
    '''
    classdocs
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.files = {}
        self.allocDirectories = []
        self.directories = set()
        self.steps = []
        self.services = []
        self.notifications = []
        self.log = Logger('log.txt','2','workflow',True).getlog()
    
    
    def delete_HDFS_dir(self, impl, hdfs_dir):
        cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir)
        cmdStr += "if [ $? -ne 0 ]; then\n"
        cmdStr += "    %s %s\n" % (impl.fs_cmd.delete, hdfs_dir)
        cmdStr += "fi\n"
        return cmdStr
    
    def splitext(self, p):
        if not os.path.exists(p):
            return False
        else:
            return os.path.splitext(os.path.basename(p))
    
    def mkdir(self, *args):
        p = _join_abs_file_name(*args)
        self.directories.add(p)
        return p
    
    def mkfile(self, name, data):
        if not os.path.isabs(name):
            raise RuntimeError('not an absolute path: %s' % name)
        name = os.path.normpath(name)
        self.files[name] = data
        return name 
    
    def paramRectify(self,paramStr,paramElem,mustBe=True):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                paramStr += " %s " % paramElem
        elif paramStr.find(paramElem) != -1:
                return paramStr.replace('-I', '')
        return paramStr
            
    def fileAppend(self,fh,commands,JobParamList={}):
        t = _generate_template(commands)
        for param in JobParamList:
            print >>fh, t.safe_substitute(param)
    
    def write_file(self,fileName,scriptsdir,commands,JobParamList=None,paramDict={},addShellHeader=False):
        scriptDict = bundle()
        scriptDict.script = []
        
        t = _generate_template(commands)
        m = re.match('.*\$\{(\S+)\}.*',fileName)
        
        if JobParamList and m:
            for d in JobParamList:
                if not d.has_key(m.group(1)):
                    self.log.error("Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1))
                if paramDict:
                    d.update(paramDict)
                file_name = _generate_template(fileName).safe_substitute(d)
                scriptFile = os.path.join(scriptsdir,file_name)
                scriptDict["script"].append(scriptFile)
                script = open(scriptFile, 'w')
                print >>script, t.safe_substitute(d)
        else:
            scriptFile = os.path.join(scriptsdir,fileName)
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if addShellHeader:
                print >>script, '#!/bin/bash'
                print >>script, "echo ==========start %s at : `date` ==========" % os.path.splitext(fileName)[0] 
                _script_append(script, t, JobParamList, paramDict)
                print >>script, "echo ==========end %s at : `date` ========== " % os.path.splitext(fileName)[0] 
            else:   
                _script_append(script, t, JobParamList, paramDict)
            script.close()
        return scriptDict
        
    def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        scriptFile = os.path.join(scriptsdir,name+'.sh')   
        script = open(scriptFile, 'w')
        print >>script, '#!/bin/bash'
        print >>script, "echo ==========start %s at : `date` ==========" % name
        _script_append(script, t, JobParamList, paramDict)
        print >>script, ""  
        print >>script, "echo ==========end %s at : `date` ========== " % name
        script.close()
        return scriptFile
    
    def write_scripts(self, name, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        
        for d in JobParamList:
            scriptsdir = d.get('SCRDIR')
            sampleName = d.get('SAMPLE')
            
            if not scriptsdir or not sampleName:
                self.log.error("Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) 
                exit(1)
            
            if paramDict:
                d.update(paramDict)
                
            scriptDict[sampleName] = os.path.join(scriptsdir,name+'.sh')   
            script = open(scriptDict[sampleName], 'w')
            
            print >>script, '#!/bin/bash'
            print >>script, "echo ==========start %s at : `date` ==========" % name
            _script_append(script, t, paramDict=d)
            print >>script, ""  
            print >>script, "echo ==========end %s at : `date` ========== " % name
            script.close()
            
        return scriptDict
            
    def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        scriptDict["script"] = []
        for n, d in enumerate(JobParamList):
            if paramDict:
                d.update(paramDict)
            dataTag = str(n)
            if d.get('DATATAG'):
                dataTag = d.get('DATATAG')
            scriptFile = os.path.join(scriptsdir,name+'_'+  dataTag  +'.sh')
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if reducer:
                print >>script, t.safe_substitute(d)
            else:
                print >>script, '#!/bin/bash'
                print >>script, "echo ==========start %s at : `date` ========== %s" % name
                print >>script, t.safe_substitute(d)
    #                 print >>script, "\n"          
                print >>script, "echo ==========end %s at : `date` ========== %s" % name
            script.close()
                
        return scriptDict
Example #2
0
class impl(object):
    '''
    classdocs
    '''
    def __init__(self):
        '''
        Constructor
        '''
        self.files = {}
        self.allocDirectories = []
        self.directories = set()
        self.steps = []
        self.services = []
        self.notifications = []
        self.log = Logger('log.txt', '2', 'workflow', True).getlog()

#     def delete_HDFS_dir(self, impl, hdfs_dir):
#         cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir)
#         cmdStr += "if [ $? -ne 0 ]; then\n"
#         cmdStr += "    %s %s\n" % (impl.fs_cmd.delete, hdfs_dir)
#         cmdStr += "fi\n"
#         return cmdStr

    def splitext(self, p):
        if not os.path.exists(p):
            return False
        else:
            return os.path.splitext(os.path.basename(p))

    def expath(self, path, name, chechExists=''):
        if not name:
            if chechExists:
                raise RuntimeError('Program is not exists: %s' % chechExists)
            else:
                return False

        if os.path.exists(name):
            return name
        else:
            for p in path.split(':'):
                if os.path.exists(os.path.join(p, name)):
                    return os.path.join(p, name)
        if chechExists:
            raise RuntimeError('Program is not exists: %s' % chechExists)
        return False

    def mkdir(self, *args):
        p = _join_abs_file_name(*args)
        self.directories.add(p)
        return p

    def mkfile(self, name, data):
        if not os.path.isabs(name):
            raise RuntimeError('not an absolute path: %s' % name)
        name = os.path.normpath(name)
        self.files[name] = data
        return name

    def paramRectify(self, paramStr, paramElem, mustBe=True):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                paramStr += " %s " % paramElem
        elif paramStr.find(paramElem) != -1:
            return paramStr.replace(paramElem, '')
        return paramStr

    def paramCheck(self, mustBe, paramStr, paramElem, paramValue=None):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                if paramValue is not None:
                    paramStr += " %s %s " % (paramElem, paramValue)
                else:
                    paramStr += " %s " % paramElem
            elif paramValue is not None:
                m = re.match('.*%s\s+(\w+).*' % paramElem, paramStr)
                if m and m.group(1) != paramValue:
                    strinfo = re.compile('%s\s+\w+' % paramElem)
                    newParamStr = strinfo.sub(
                        '%s %s' % (paramElem, paramValue), paramStr)
                    self.log.warning("Rectify parameter '%s' to '%s'" %
                                     (paramStr, newParamStr))
                    return newParamStr
        elif paramStr.find(paramElem) != -1:
            return paramStr.replace(paramElem, '')
        return paramStr

    def hasParam(self, paramStr, paramElem):
        if paramStr.find(paramElem) != -1:
            return True
        return False

    def fileAppend(self, fh, commands, JobParamList={}):
        t = _generate_template(commands)
        for param in JobParamList:
            print >> fh, t.safe_substitute(param)

    def write_file(self,
                   fileName,
                   scriptsdir,
                   commands,
                   JobParamList=None,
                   paramDict={},
                   addShellHeader=False):
        scriptDict = bundle()
        scriptDict.script = []

        t = _generate_template(commands)
        m = re.match('.*\$\{(\S+)\}.*', fileName)

        if JobParamList and m:
            for d in JobParamList:
                if not d.has_key(m.group(1)):
                    self.log.error(
                        "Wrong about impl.write_file paramter: fileName. No %s in JobParamList."
                        % m.group(1))
                if paramDict:
                    d.update(paramDict)
                file_name = _generate_template(fileName).safe_substitute(d)
                scriptFile = os.path.join(scriptsdir, file_name)
                scriptDict["script"].append(scriptFile)
                script = open(scriptFile, 'w')
                print >> script, t.safe_substitute(d)
        else:
            scriptFile = os.path.join(scriptsdir, fileName)
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if addShellHeader:
                print >> script, '#!/bin/bash'
                print >> script, "echo ==========start %s at : `date` ==========" % os.path.splitext(
                    fileName)[0]
                _script_append(script, t, JobParamList, paramDict)
                print >> script, "echo ==========end %s at : `date` ========== " % os.path.splitext(
                    fileName)[0]
            else:
                _script_append(script, t, JobParamList, paramDict)
            script.close()
        return scriptDict

    def write_shell(self,
                    name,
                    scriptsdir,
                    commands,
                    JobParamList=[],
                    paramDict={}):
        t = _generate_template(commands)

        scriptFile = os.path.join(scriptsdir, name + '.sh')
        script = open(scriptFile, 'w')
        print >> script, '#!/bin/bash'
        print >> script, "echo ==========start %s at : `date` ==========" % name
        _script_append(script, t, JobParamList, paramDict)
        print >> script, ""
        print >> script, "echo ==========end %s at : `date` ========== " % name
        script.close()
        return scriptFile

    def write_scripts(self, name, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        if paramDict:
            t = Template(t.safe_substitute(paramDict))

        for d in JobParamList:
            scriptsdir = d.get('SCRDIR')
            sampleName = d.get('SAMPLE')

            if not scriptsdir or not sampleName:
                self.log.error(
                    "Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." %
                    name)
                exit(1)

            scriptDict[sampleName] = os.path.join(scriptsdir, name + '.sh')
            script = open(scriptDict[sampleName], 'w')

            print >> script, '#!/bin/bash'
            print >> script, "echo ==========start %s at : `date` ==========" % name
            _script_append(script, t, paramDict=d)
            print >> script, ""
            print >> script, "echo ==========end %s at : `date` ========== " % name
            script.close()

        return scriptDict

    def write_Scripts(self,
                      name,
                      scriptsdir,
                      commands,
                      JobParamList=[],
                      paramDict={},
                      reducer=True):
        scriptDict = bundle()
        t = _generate_template(commands)

        scriptDict["script"] = []
        for n, d in enumerate(JobParamList):
            if paramDict:
                d.update(paramDict)
            dataTag = str(n)
            if d.get('DATATAG'):
                dataTag = d.get('DATATAG')
            scriptFile = os.path.join(scriptsdir, name + '_' + dataTag + '.sh')
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if reducer:
                print >> script, t.safe_substitute(d)
            else:
                print >> script, '#!/bin/bash'
                print >> script, "echo ==========start %s at : `date` ========== %s" % name
                print >> script, t.safe_substitute(d)
                #                 print >>script, "\n"
                print >> script, "echo ==========end %s at : `date` ========== %s" % name
            script.close()

        return scriptDict
Example #3
0
 # try:
 #     configInfo = bundle(clean(json.loads(data)))
 # except Exception,e:
 #     print Exception,"%s, "%self.config,e
 # configInfo.rupdate(userConf)
 
 if not configInfo.has_key('Path'):
     configInfo.Path = bundle()
 if not configInfo.has_key('self_defined'):
     configInfo.self_defined = bundle()
 
 necessary_property = ( "analysis_flow", "hadoop", "file", "ref")
 
 for np in necessary_property:
     if not configInfo.has_key(np):
         logger.error("config property: %s is not set" % np)
         exit(2)
         
 if configInfo.hadoop.get('is_at_TH'):
     if isinstance(configInfo.hadoop.is_at_TH,str):
         if configInfo.hadoop.is_at_TH == 'true' or configInfo.hadoop.is_at_TH == 'True':
             configInfo.hadoop.is_at_TH = True
         else:
             configInfo.hadoop.is_at_TH = False
 
 if  configInfo.hadoop.has_key('root'):
     configInfo.hadoop.bin = os.path.join(configInfo.hadoop.root,'bin/hadoop')
     configInfo.hadoop.streamingJar = os.path.join(configInfo.hadoop.root,'contrib/streaming/hadoop-streaming-0.20.2-cdh3u6.jar')
 if not os.path.exists(configInfo.hadoop.bin):
     logger.error("hadoop path: %s is not exists" % configInfo.hadoop.bin)
 if not os.path.exists(configInfo.hadoop.streamingJar):