Exemple #1
0
 def __init__(self):
     '''
     Constructor
     '''
     self.files = {}
     self.allocDirectories = []
     self.directories = set()
     self.steps = []
     self.services = []
     self.notifications = []
     self.log = Logger('log.txt', '2', 'workflow', True).getlog()
def run(args, state):
    analysisDict = state.analysisDict
    sampleName = args.sampleName
    logger = Logger(os.path.join(state.scriptsDir, 'log'), '1',
                    'gaeaJobMonitor', False).getlog()
    isComplete = bundle()

    all_done = True

    jobList = args.jobs.split(',')

    if jobList[0] == 'init':
        if not state.results['init'].get('script'):
            jobList = jobList[1:]

    for num, step in enumerate(jobList):
        if analysisDict[step].platform == 'S':
            continue

        n = state.analysisList.index(step)
        if state.analysisList[0] != 'init':
            n += 1

        script = state.results[step]['script'][sampleName]
        if num > 0:
            for depStep in analysisDict[step].depend:
                if not isComplete[depStep]:
                    isComplete[step] = False
                    break
        if isComplete.has_key(step) and isComplete[step] == False:
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            continue

        printtime('step: %s start...' % step)
        p = subprocess.Popen('sh %s' % script,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        isComplete[step] = check_log(p, script, sampleName, n, step)
        if isComplete[step] or step == 'alignment':
            if step == 'alignment':
                isComplete[step] = True
            printtime("step: %s complete" % step)
            logger.info('%s - step %d: %s complete' % (sampleName, n, step))
            out_fh = open(script + '.o', 'w')
            for line in p.stdout.readlines():
                print >> out_fh, line[:-1]
            p.wait()
        else:
            all_done = False
            printtime("%s failed" % step)
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            if p.returncode == None:
                p.kill()

    return all_done
Exemple #3
0
def main():
    program_name = os.path.basename(sys.argv[0])
    program_license = '''{0}
      Created by huangzhibo on {1}.
      Last updated on {2}.
      Copyright 2017 BGI bigData. All rights reserved.
    USAGE'''.format(" v".join([program_name, __version__]), str(__date__),
                    str(__updated__))

    parser = ArgumentParser(description=program_license,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s",
                        "--state",
                        dest="state",
                        help="state file,[default: %(default)s]",
                        required=True)
    parser.add_argument("-r",
                        "--rerun",
                        dest="rerun",
                        help="rerun file,[default: %(default)s]",
                        required=True)

    if len(sys.argv) == 1:
        parser.print_help()
        exit(1)

    # Process arguments
    args = parser.parse_args()
    if not os.path.exists(args.state):
        printtime('ERROR: (--state: %s) - No such file or directory' %
                  args.state)
        return 1
    if not os.path.exists(args.rerun):
        printtime('ERROR: (--state: %s) - No such file or directory' %
                  args.state)
        return 1

    state = ParseConfig(args.state).parseState()
    if 'bamSort' in state:
        state.bamSort.exclusive_task = 'False'
    if 'bamSort_M' in state:
        state.bamSort_M.exclusive_task = 'False'
    state.init.exclusive_task = 'False'
    if 'bamindex' in state:
        state.bamindex.exclusive_task = 'False'

    logger = Logger(os.path.join(state.scriptsDir, 'log'), '1',
                    'job_scheduler', False).getlog()
    state.logger = logger
    sched = Scheduler(state)
    sched.parse_rerun(args.rerun)
    sched.start()

    with open(os.path.join(state.stateDir, 'success'), 'w') as f:
        f.write('done!')

    return 0
Exemple #4
0
 def __init__(self):
     '''
     Constructor
     '''
     self.files = {}
     self.allocDirectories = []
     self.directories = set()
     self.steps = []
     self.services = []
     self.notifications = []
     self.log = Logger('log.txt','2','workflow',True).getlog()
def run(args,state):
    analysisDict = state.analysisDict
    sampleName = args.sampleName
    logger = Logger(os.path.join(state.scriptsDir,'log'),'1','gaeaJobMonitor',False).getlog()
    isComplete = bundle()
    
    all_done = True    

    jobList = args.jobs.split(',')
    
    if jobList[0] == 'init':
        if not state.results['init'].get('script'):
            jobList = jobList[1:]
    
    for num,step in enumerate(jobList):
        if analysisDict[step].platform == 'S':
            continue
        
        n = state.analysisList.index(step)
        if state.analysisList[0] != 'init':
            n += 1
        
        script = state.results[step]['script'][sampleName]
        if num > 0:
            for depStep in analysisDict[step].depend:
                if not isComplete[depStep]:
                    isComplete[step] = False
                    break
        if isComplete.has_key(step) and isComplete[step] == False:
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            continue
        
        printtime('step: %s start...' % step)
        p = subprocess.Popen('sh %s' % script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        isComplete[step] = check_log(p,script,sampleName,n, step)
        if isComplete[step] or step == 'alignment':
            if step == 'alignment':
                isComplete[step] = True
            printtime("step: %s complete" % step)
            logger.info('%s - step %d: %s complete' % (sampleName, n, step))
            out_fh = open(script+'.o', 'w')
            for line in p.stdout.readlines():    
                print >>out_fh, line[:-1]
            p.wait()
        else:
            all_done = False    
            printtime("%s failed" % step)
            logger.warning('%s - step %d: %s failed' % (sampleName, n, step))
            if p.returncode == None:
                p.kill()
                
    return all_done
Exemple #6
0
from gaeautils import bundle, Logger
import glob
import os


logger = Logger('log.txt','2',"mode6",True).getlog()
def parse_sample(sampleList):
    total_number = 0
    sampleInfo = bundle()
    with open(sampleList,'r') as f:
        for line in f:
            fq1s = []
            line = line.strip()
            field = line.split()
            rg_LB = field[2]
            rg_PU = field[3]
            sampleName = field[1]

            if field[3].find(',') != -1:
                fq1s.append(field[3].split(',')[0])
                rg_LB = field[1]
                rg_PU = field[2]
                sampleName = field[0]
            else:
                fq_dir = field[-1].strip()
                fq1s = glob.glob("%s/*1.fq.gz" % fq_dir)
                if not fq1s:
                    fq1s = glob.glob("%s/*/*1.fq.gz" % fq_dir)

            if len(fq1s) == 0 or not os.path.exists(fq1s[0]):
                logger.error("fq1 under %s don't exists." % sampleName)
Exemple #7
0
class impl(object):
    '''
    classdocs
    '''
    def __init__(self):
        '''
        Constructor
        '''
        self.files = {}
        self.allocDirectories = []
        self.directories = set()
        self.steps = []
        self.services = []
        self.notifications = []
        self.log = Logger('log.txt', '2', 'workflow', True).getlog()

#     def delete_HDFS_dir(self, impl, hdfs_dir):
#         cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir)
#         cmdStr += "if [ $? -ne 0 ]; then\n"
#         cmdStr += "    %s %s\n" % (impl.fs_cmd.delete, hdfs_dir)
#         cmdStr += "fi\n"
#         return cmdStr

    def splitext(self, p):
        if not os.path.exists(p):
            return False
        else:
            return os.path.splitext(os.path.basename(p))

    def expath(self, path, name, chechExists=''):
        if not name:
            if chechExists:
                raise RuntimeError('Program is not exists: %s' % chechExists)
            else:
                return False

        if os.path.exists(name):
            return name
        else:
            for p in path.split(':'):
                if os.path.exists(os.path.join(p, name)):
                    return os.path.join(p, name)
        if chechExists:
            raise RuntimeError('Program is not exists: %s' % chechExists)
        return False

    def mkdir(self, *args):
        p = _join_abs_file_name(*args)
        self.directories.add(p)
        return p

    def mkfile(self, name, data):
        if not os.path.isabs(name):
            raise RuntimeError('not an absolute path: %s' % name)
        name = os.path.normpath(name)
        self.files[name] = data
        return name

    def paramRectify(self, paramStr, paramElem, mustBe=True):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                paramStr += " %s " % paramElem
        elif paramStr.find(paramElem) != -1:
            return paramStr.replace(paramElem, '')
        return paramStr

    def paramCheck(self, mustBe, paramStr, paramElem, paramValue=None):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                if paramValue is not None:
                    paramStr += " %s %s " % (paramElem, paramValue)
                else:
                    paramStr += " %s " % paramElem
            elif paramValue is not None:
                m = re.match('.*%s\s+(\w+).*' % paramElem, paramStr)
                if m and m.group(1) != paramValue:
                    strinfo = re.compile('%s\s+\w+' % paramElem)
                    newParamStr = strinfo.sub(
                        '%s %s' % (paramElem, paramValue), paramStr)
                    self.log.warning("Rectify parameter '%s' to '%s'" %
                                     (paramStr, newParamStr))
                    return newParamStr
        elif paramStr.find(paramElem) != -1:
            return paramStr.replace(paramElem, '')
        return paramStr

    def hasParam(self, paramStr, paramElem):
        if paramStr.find(paramElem) != -1:
            return True
        return False

    def fileAppend(self, fh, commands, JobParamList={}):
        t = _generate_template(commands)
        for param in JobParamList:
            print >> fh, t.safe_substitute(param)

    def write_file(self,
                   fileName,
                   scriptsdir,
                   commands,
                   JobParamList=None,
                   paramDict={},
                   addShellHeader=False):
        scriptDict = bundle()
        scriptDict.script = []

        t = _generate_template(commands)
        m = re.match('.*\$\{(\S+)\}.*', fileName)

        if JobParamList and m:
            for d in JobParamList:
                if not d.has_key(m.group(1)):
                    self.log.error(
                        "Wrong about impl.write_file paramter: fileName. No %s in JobParamList."
                        % m.group(1))
                if paramDict:
                    d.update(paramDict)
                file_name = _generate_template(fileName).safe_substitute(d)
                scriptFile = os.path.join(scriptsdir, file_name)
                scriptDict["script"].append(scriptFile)
                script = open(scriptFile, 'w')
                print >> script, t.safe_substitute(d)
        else:
            scriptFile = os.path.join(scriptsdir, fileName)
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if addShellHeader:
                print >> script, '#!/bin/bash'
                print >> script, "echo ==========start %s at : `date` ==========" % os.path.splitext(
                    fileName)[0]
                _script_append(script, t, JobParamList, paramDict)
                print >> script, "echo ==========end %s at : `date` ========== " % os.path.splitext(
                    fileName)[0]
            else:
                _script_append(script, t, JobParamList, paramDict)
            script.close()
        return scriptDict

    def write_shell(self,
                    name,
                    scriptsdir,
                    commands,
                    JobParamList=[],
                    paramDict={}):
        t = _generate_template(commands)

        scriptFile = os.path.join(scriptsdir, name + '.sh')
        script = open(scriptFile, 'w')
        print >> script, '#!/bin/bash'
        print >> script, "echo ==========start %s at : `date` ==========" % name
        _script_append(script, t, JobParamList, paramDict)
        print >> script, ""
        print >> script, "echo ==========end %s at : `date` ========== " % name
        script.close()
        return scriptFile

    def write_scripts(self, name, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        if paramDict:
            t = Template(t.safe_substitute(paramDict))

        for d in JobParamList:
            scriptsdir = d.get('SCRDIR')
            sampleName = d.get('SAMPLE')

            if not scriptsdir or not sampleName:
                self.log.error(
                    "Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." %
                    name)
                exit(1)

            scriptDict[sampleName] = os.path.join(scriptsdir, name + '.sh')
            script = open(scriptDict[sampleName], 'w')

            print >> script, '#!/bin/bash'
            print >> script, "echo ==========start %s at : `date` ==========" % name
            _script_append(script, t, paramDict=d)
            print >> script, ""
            print >> script, "echo ==========end %s at : `date` ========== " % name
            script.close()

        return scriptDict

    def write_Scripts(self,
                      name,
                      scriptsdir,
                      commands,
                      JobParamList=[],
                      paramDict={},
                      reducer=True):
        scriptDict = bundle()
        t = _generate_template(commands)

        scriptDict["script"] = []
        for n, d in enumerate(JobParamList):
            if paramDict:
                d.update(paramDict)
            dataTag = str(n)
            if d.get('DATATAG'):
                dataTag = d.get('DATATAG')
            scriptFile = os.path.join(scriptsdir, name + '_' + dataTag + '.sh')
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if reducer:
                print >> script, t.safe_substitute(d)
            else:
                print >> script, '#!/bin/bash'
                print >> script, "echo ==========start %s at : `date` ========== %s" % name
                print >> script, t.safe_substitute(d)
                #                 print >>script, "\n"
                print >> script, "echo ==========end %s at : `date` ========== %s" % name
            script.close()

        return scriptDict
Exemple #8
0

class CLIError(Exception):
    '''Generic exception to raise and log different fatal errors.'''
    def __init__(self, msg):
        super(CLIError).__init__(type(self))
        self.msg = "E: %s" % msg

    def __str__(self):
        return self.msg

    def __unicode__(self):
        return self.msg


logger = Logger('log.txt', '2', 'Gaea', True).getlog()


def createWorkflowObject(workflowClass, state):
    wf = workflowClass(state)
    return wf


def writeRunShell(gaeaShell, state):
    out = open(gaeaShell, 'w')
    print >> out, '#!/bin/sh'
    print >> out, "source %s/bin/activate" % state.GAEA_HOME
    print >> out, 'runtype=$*'
    print >> out, 'Usage()\n{'
    print >> out, '\techo "Usage: ./run.sh <options>"'
    print >> out, '\techo "       Run Tasks (Local) : ./run.sh"'
Exemple #9
0
class parseApp(object):
    '''
    classdocs
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.files = {}
        self.allocDirectories = []
        self.directories = set()
        self.steps = []
        self.services = []
        self.notifications = []
        self.log = Logger('log.txt','2','workflow',True).getlog()
    
    
    def delete_HDFS_dir(self, impl, hdfs_dir):
        cmdStr = "%s %s\n" % (impl.fs_cmd.exist, hdfs_dir)
        cmdStr += "if [ $? -ne 0 ]; then\n"
        cmdStr += "    %s %s\n" % (impl.fs_cmd.delete, hdfs_dir)
        cmdStr += "fi\n"
        return cmdStr
    
    def splitext(self, p):
        if not os.path.exists(p):
            return False
        else:
            return os.path.splitext(os.path.basename(p))
    
    def mkdir(self, *args):
        p = _join_abs_file_name(*args)
        self.directories.add(p)
        return p
    
    def mkfile(self, name, data):
        if not os.path.isabs(name):
            raise RuntimeError('not an absolute path: %s' % name)
        name = os.path.normpath(name)
        self.files[name] = data
        return name 
    
    def paramRectify(self,paramStr,paramElem,mustBe=True):
        if mustBe:
            if paramStr.find(paramElem) == -1:
                paramStr += " %s " % paramElem
        elif paramStr.find(paramElem) != -1:
                return paramStr.replace('-I', '')
        return paramStr
            
    def fileAppend(self,fh,commands,JobParamList={}):
        t = _generate_template(commands)
        for param in JobParamList:
            print >>fh, t.safe_substitute(param)
    
    def write_file(self,fileName,scriptsdir,commands,JobParamList=None,paramDict={},addShellHeader=False):
        scriptDict = bundle()
        scriptDict.script = []
        
        t = _generate_template(commands)
        m = re.match('.*\$\{(\S+)\}.*',fileName)
        
        if JobParamList and m:
            for d in JobParamList:
                if not d.has_key(m.group(1)):
                    self.log.error("Wrong about impl.write_file paramter: fileName. No %s in JobParamList." % m.group(1))
                if paramDict:
                    d.update(paramDict)
                file_name = _generate_template(fileName).safe_substitute(d)
                scriptFile = os.path.join(scriptsdir,file_name)
                scriptDict["script"].append(scriptFile)
                script = open(scriptFile, 'w')
                print >>script, t.safe_substitute(d)
        else:
            scriptFile = os.path.join(scriptsdir,fileName)
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if addShellHeader:
                print >>script, '#!/bin/bash'
                print >>script, "echo ==========start %s at : `date` ==========" % os.path.splitext(fileName)[0] 
                _script_append(script, t, JobParamList, paramDict)
                print >>script, "echo ==========end %s at : `date` ========== " % os.path.splitext(fileName)[0] 
            else:   
                _script_append(script, t, JobParamList, paramDict)
            script.close()
        return scriptDict
        
    def write_shell(self, name, scriptsdir, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        scriptFile = os.path.join(scriptsdir,name+'.sh')   
        script = open(scriptFile, 'w')
        print >>script, '#!/bin/bash'
        print >>script, "echo ==========start %s at : `date` ==========" % name
        _script_append(script, t, JobParamList, paramDict)
        print >>script, ""  
        print >>script, "echo ==========end %s at : `date` ========== " % name
        script.close()
        return scriptFile
    
    def write_scripts(self, name, commands, JobParamList=[], paramDict={}):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        
        for d in JobParamList:
            scriptsdir = d.get('SCRDIR')
            sampleName = d.get('SAMPLE')
            
            if not scriptsdir or not sampleName:
                self.log.error("Error in step (%s) JobParamList(no SMAPLE or SCRDIR)." % name) 
                exit(1)
            
            if paramDict:
                d.update(paramDict)
                
            scriptDict[sampleName] = os.path.join(scriptsdir,name+'.sh')   
            script = open(scriptDict[sampleName], 'w')
            
            print >>script, '#!/bin/bash'
            print >>script, "echo ==========start %s at : `date` ==========" % name
            _script_append(script, t, paramDict=d)
            print >>script, ""  
            print >>script, "echo ==========end %s at : `date` ========== " % name
            script.close()
            
        return scriptDict
            
    def write_Scripts(self, name, scriptsdir, commands, JobParamList=[], paramDict={}, reducer=True):
        scriptDict = bundle()
        t = _generate_template(commands)
        
        scriptDict["script"] = []
        for n, d in enumerate(JobParamList):
            if paramDict:
                d.update(paramDict)
            dataTag = str(n)
            if d.get('DATATAG'):
                dataTag = d.get('DATATAG')
            scriptFile = os.path.join(scriptsdir,name+'_'+  dataTag  +'.sh')
            scriptDict["script"].append(scriptFile)
            script = open(scriptFile, 'w')
            if reducer:
                print >>script, t.safe_substitute(d)
            else:
                print >>script, '#!/bin/bash'
                print >>script, "echo ==========start %s at : `date` ========== %s" % name
                print >>script, t.safe_substitute(d)
    #                 print >>script, "\n"          
                print >>script, "echo ==========end %s at : `date` ========== %s" % name
            script.close()
                
        return scriptDict
Exemple #10
0
#!/usr/local/bin/python2.7
# encoding: utf-8
'''
Created on 2016年1月18日

@author: huangzhibo
'''
from gaeautils import  Logger, clean
from gaeautils.bundle import bundle
import json
import os
from configobj import ConfigObj

logger = Logger('log.txt','2',"parseConfig",True).getlog()

def bundle_rcopy(cfg):
    newdict = bundle()
    for entry in cfg:
        this_entry = cfg[entry]
        if isinstance(this_entry, dict):
            this_entry = bundle_rcopy(this_entry)
        elif isinstance(this_entry, list): # create a copy rather than a reference
            this_entry = list(this_entry)
        elif isinstance(this_entry, tuple): # create a copy rather than a reference
            this_entry = tuple(this_entry)
        newdict[entry] = this_entry
    
    return newdict

def getAnalysisDict(analysis_flow):
    graph = bundle(init=bundle())
Exemple #11
0
#!/usr/bin/env python
# encoding: utf-8
from gaeautils import bundle, Logger, search_mod
import os

logger = Logger('log.txt', '2', "parseSampleList", True).getlog()


class ParseSampleList(object):
    '''
    This class is used to parse sample list
    '''

    config = bundle()

    def __init__(self, sampleList, config):
        '''
        Constructor
        '''
        self.sampleList = sampleList
        self.config = config

    def rectify_gender(self, gender):
        if gender == 'F' or gender == 'female':
            return 'female'
        else:
            return 'male'

    def check_gender(self, sampleinfo, sampleName):
        sampleGender = ''
        for dataTag in sampleinfo: