예제 #1
0
파일: ComEtl.py 프로젝트: zhuyeqing/ComETL
def GetConf(conf_file):
    try:
        f = open("%s" % (conf_file), "r")
        str_op_conf = f.read()
        op_conf = pickle.loads(str_op_conf)
        f.close()
        return op_conf
    except Exception, e:
        log("=========unknown error at GetConf--%s" % str(e), "error")
        return None
예제 #2
0
def GetConf(conf_file):
    try:
        f = open("%s" % (conf_file), 'r')
        str_op_conf = f.read()
        op_conf = pickle.loads(str_op_conf)
        f.close()
        return op_conf
    except Exception, e:
        log('=========unknown error at GetConf--%s' % str(e), 'error')
        return None
예제 #3
0
파일: ComEtl.py 프로젝트: zhuyeqing/ComETL
def FailedRecover(conffile, runtime, run_log):
    try:
        str_op_conf = pickle.dumps(run_log)

        failed_file = "failedrecover_%s_%s" % (conffile, runtime)
        folder = conf.basedir + "/failedrecover/"
        os.system("mkdir -p %s" % folder)
        f = open("%s%s" % (folder, failed_file), "w")
        f.write(str_op_conf)
        f.close()
    except Exception, e:
        log("unknown error at FailedRecover--%s" % str(e), "error")
        return False
예제 #4
0
def FailedRecover(conffile, runtime, run_log):
    try:
        str_op_conf = pickle.dumps(run_log)

        failed_file = "failedrecover_%s_%s" % (conffile, runtime)
        folder = conf.basedir + '/failedrecover/'
        os.system("mkdir -p %s" % folder)
        f = open("%s%s" % (folder, failed_file), 'w')
        f.write(str_op_conf)
        f.close()
    except Exception, e:
        log('unknown error at FailedRecover--%s' % str(e), 'error')
        return False
 def getMaxDownloadMethod(self, currentTime, currentDay):
     maxDownloadSpeed = 0
     
     if(self.sabSpeedOverride != None and self.sabSpeedOverride.isSpeedOverridden()):
         maxDownloadSpeed = self.sabSpeedOverride.getOverriddenDownloadSpeed()
     else:
         if (currentTime > NIGHT_TIME_START and currentTime < NIGHT_TIME_STOP):
             log(currentTime, " - It's night time.")
             maxDownloadSpeed = HIGH_SPEED_LIMIT
         elif (currentTime > WORK_TIME_START and currentTime < WORK_TIME_STOP):
             log(currentTime, " - It's work time.")
             maxDownloadSpeed = HIGH_SPEED_LIMIT
         else:
             log(currentTime, " - We're awake and at home.")
             maxDownloadSpeed = LOW_SPEED_LIMIT
     # 5 and 6 = Saturday and Sunday
         if ((currentDay == 5 or currentDay == 6) and not (currentTime > NIGHT_TIME_START and currentTime < NIGHT_TIME_STOP)):
             log("It's the weekend (", currentDay, ") and during the day.")
             maxDownloadSpeed = LOW_SPEED_LIMIT
         else:
             log("It's a weekday (", currentDay, ").")
     
     return maxDownloadSpeed
def printStartupConfiguration():
    log("--------------------------------")
    log("Initalising SAB Speed Monitor...")
    log("--------------------------------\n")

    log("Jas SAB Server: ", JAS_SAB)
    log("Rich SAB Server: ", RIC_SAB)

    log("High speed limit: ", HIGH_SPEED_LIMIT, "kb/s - Times: ", NIGHT_TIME_START, "->", NIGHT_TIME_STOP, " and ", WORK_TIME_START, "->", WORK_TIME_STOP)
    log("Low speed limit: ", LOW_SPEED_LIMIT, "kb/s at all other times\n\n")
예제 #7
0
파일: ComEtl.py 프로젝트: zhuyeqing/ComETL
def EtlSched(etlclass, conffile, timerange, debugmode, job_name, process):
    conf_model_path = conf.basedir + "/etl_conf"
    etl_model_path = conf.basedir + "/etl"
    pdbc_model_path = conf.basedir + "/pdbc"

    if sys.path.count(conf_model_path) == 0:
        sys.path.append(conf_model_path)
    if sys.path.count(etl_model_path) == 0:
        sys.path.append(etl_model_path)
    if sys.path.count(pdbc_model_path) == 0:
        sys.path.append(pdbc_model_path)

    db_object_dict = {"loading": {}, "Extraction": {}}
    for db_type in conf.db_info.keys():
        tmp_db = imp.load_source("tmp_db", pdbc_model_path + "/" + conf.db_info[db_type]["model"])
        if tmp_db.__dict__.has_key(conf.db_info[db_type]["classname"]):
            tmp_db_class = tmp_db.__dict__[conf.db_info[db_type]["classname"]]
            if conf.db_info[db_type]["support"] == "Loading":
                db_object_dict["loading"][db_type] = tmp_db_class(log)
            elif conf.db_info[db_type]["support"] == "Extraction":
                db_object_dict["Extraction"][db_type] = tmp_db_class(log)
            else:
                db_object_dict["Extraction"][db_type] = tmp_db_class(log)
                db_object_dict["loading"][db_type] = tmp_db_class(log)

    etl_conf = imp.load_source("etl_conf", conf_model_path + "/" + conffile + ".py")
    timelist = []
    if etl_conf.etl_op.get("run_mode", "day") == "day":
        timelist = ProTimeRange(timerange)
    elif etl_conf.etl_op.get("run_mode", "day") == "hour":
        timelist = ProTimeRangeHour(timerange, etl_conf.etl_op.get("delay_hours", 2))
    else:
        log("[%s] unknown run_mode %s" % (conffile, etl_conf.etl_op.get("run_mode", "day")), "error")
        return False

    etl_res = True
    for runtime in timelist:
        etl_op = copy.deepcopy(etl_conf.etl_op)
        global_args = {}
        global_args["conffile"] = conffile
        global_args["tmpdir"] = conf.tmp_dir + "/" + conffile
        global_args["tmp_dir"] = global_args["tmpdir"]
        global_args["date"] = runtime[:8]
        global_args["hour"] = runtime[8:10] or "23"

        run_log = {}
        RunLogInit(run_log, etl_op, runtime, conffile)

        for job in etl_op["jobs"]:
            if job_name:
                if job["job_name"] != job_name:
                    continue

            for i in range(len(job["analysis"])):
                if job["analysis"][i]["db_type"] in ["hive", "hivecli"]:
                    job["analysis"][i]["pre_sql"].insert(0, "set mapred.job.queue.name=%s" % conf.mapred_queue)
                if job["analysis"][i]["db_type"] in ["mapred"]:
                    job["analysis"][i]["sql"] = (
                        job["analysis"][i]["sql"] + " -jobconf mapred.job.queue.name=%s " % conf.mapred_queue
                    )

            action_list = []
            action_list.extend(job["analysis"])
            action_list.extend(job["transform"])
            action_list.extend(job["loading"])
            for action in action_list:
                action["job_name"] = job["job_name"]
                action.update(global_args)
                etlclass = action.get("etl_class_name", "")
                if process:
                    if action["step_name"] == process:
                        etlmodel = imp.load_source("cometl", etl_model_path + "/" + etlclass + ".py")
                        cometl = etlmodel.__dict__[etlclass](action, db_object_dict, log, conffile, debugmode)
                        if cometl.run():
                            log(
                                "[%s-%s-%s-%s] success" % (conffile, job["job_name"], action["step_name"], runtime),
                                "info",
                            )
                            return (True, "")
                        else:
                            log(
                                "[%s-%s-%s-%s] failed" % (conffile, job["job_name"], action["step_name"], runtime),
                                "error",
                            )
                            # op_conf={"etlclass":etlclass,"conffile":conffile,"timerange":runtime,"process":action["step_name"],"debugmode":debugmode}
                            # FailedRecover(op_conf)
                            # dqsvalue = {'type':'job','title':'ETL','program':'%s_%s_%s' % (conffile,action["step_name"],runtime),'state':0,'msg':'%s_%s_%s_etl_failed' % (conffile,action["step_name"],runtime)}
                            # DqsAlarm.DQSAlarm('job',dqsvalue)
                            return (False, "%s_%s_%s_%s" % (conffile, job["job_name"], action["step_name"], runtime))
                else:
                    if run_log[job["job_name"]][action["step_name"]] == "ok":
                        continue
                    etlmodel = imp.load_source("cometl", etl_model_path + "/" + etlclass + ".py")
                    cometl = etlmodel.__dict__[etlclass](action, db_object_dict, log, conffile, debugmode)
                    if cometl.run():
                        log("[%s-%s-%s-%s] success" % (conffile, job["job_name"], action["step_name"], runtime), "info")
                        run_log[job["job_name"]][action["step_name"]] = "ok"
                    else:
                        run_log[job["job_name"]][action["step_name"]] = "failed"
                        log("[%s-%s-%s-%s] failed" % (conffile, job["job_name"], action["step_name"], runtime), "error")
                        # op_conf={"etlclass":etlclass,"conffile":conffile,"timerange":runtime,"process":action["step_name"],"debugmode":debugmode}
                        if job_name:
                            return (False, "%s_%s_%s_%s" % (conffile, job["job_name"], action["step_name"], runtime))
                        else:
                            FailedRecover(conffile, runtime, run_log)
                            # dqsvalue = {'type':'job','title':'ETL','program':'%s_%s_%s_%s' % (conffile,job['job_name'],action["step_name"],runtime),'state':0,'msg':'%s_%s_%s_etl_failed' % (conffile,action["step_name"],runtime)}
                            # DqsAlarm.DQSAlarm('job',dqsvalue)
                            return (False, "%s_%s_%s_%s" % (conffile, job["job_name"], action["step_name"], runtime))

    return (True, "")
 def startControllingSabSpeeds(self):
     while True :
         currentTime = datetime.datetime.now().time()
         currentDay = datetime.datetime.now().weekday()
     
         try:
             jasSABSpeed = getCurrentDownloadSpeed(JAS_SAB, JAS_SAB_API_KEY)
         except IOError:
             log("**Exception**", currentTime, "I/O error trying to access Jas's SAB.")
             time.sleep(TIME_BETWEEN_SAB_SPEED_CHECKS_IF_EXCEPTION)
             continue
     
         try:
             richSABSpeed = getCurrentDownloadSpeed(RIC_SAB, RIC_SAB_API_KEY)
         except IOError:
             log("**Exception**", currentTime, "I/O error trying to access Rich's SAB.")
             time.sleep(TIME_BETWEEN_SAB_SPEED_CHECKS_IF_EXCEPTION)
             continue
     
         log("Jas SAB current speed:", jasSABSpeed, "kb/s\t\tRich SAB current speed:", richSABSpeed, "kb/s")
     
         maxDownloadSpeed = self.getMaxDownloadMethod(currentTime, currentDay)
         
         log("MaxSpeed: ", maxDownloadSpeed)
     
         if (self.isSabActive(jasSABSpeed) and self.isSabActive(richSABSpeed)):
             log("Both servers idle. Standby.")
         elif (self.isSabActive(richSABSpeed)):
             log("Set Rich's server to", maxDownloadSpeed, "kb/s")
             setNewDownloadSpeed(RIC_SAB, RIC_SAB_API_KEY, maxDownloadSpeed)
         elif (self.isSabActive(jasSABSpeed)):
             log("Set Jas's server to", maxDownloadSpeed, "kb/s")
             setNewDownloadSpeed(JAS_SAB, JAS_SAB_API_KEY, maxDownloadSpeed)        
         else:
             log("Aggregate", maxDownloadSpeed, "kb/s evenly across both servers")
             
             aggregatedSpeed = maxDownloadSpeed / 2
     
             setNewDownloadSpeed(RIC_SAB, RIC_SAB_API_KEY, aggregatedSpeed)
             setNewDownloadSpeed(JAS_SAB, JAS_SAB_API_KEY, aggregatedSpeed)
     
         print("\n")
     
         time.sleep(TIME_BETWEEN_SAB_SPEED_CHECKS)
예제 #9
0
def EtlSched(etlclass, conffile, timerange, debugmode, job_name, process):
    conf_model_path = conf.basedir + '/etl_conf'
    etl_model_path = conf.basedir + '/etl'
    pdbc_model_path = conf.basedir + '/pdbc'

    if sys.path.count(conf_model_path) == 0:
        sys.path.append(conf_model_path)
    if sys.path.count(etl_model_path) == 0:
        sys.path.append(etl_model_path)
    if sys.path.count(pdbc_model_path) == 0:
        sys.path.append(pdbc_model_path)

    db_object_dict = {'loading': {}, 'Extraction': {}}
    for db_type in conf.db_info.keys():
        tmp_db = imp.load_source(
            'tmp_db', pdbc_model_path + '/' + conf.db_info[db_type]['model'])
        if tmp_db.__dict__.has_key(conf.db_info[db_type]['classname']):
            tmp_db_class = tmp_db.__dict__[conf.db_info[db_type]['classname']]
            if conf.db_info[db_type]['support'] == 'Loading':
                db_object_dict['loading'][db_type] = tmp_db_class(log)
            elif conf.db_info[db_type]['support'] == 'Extraction':
                db_object_dict['Extraction'][db_type] = tmp_db_class(log)
            else:
                db_object_dict['Extraction'][db_type] = tmp_db_class(log)
                db_object_dict['loading'][db_type] = tmp_db_class(log)

    etl_conf = imp.load_source('etl_conf',
                               conf_model_path + '/' + conffile + '.py')
    timelist = []
    if etl_conf.etl_op.get('run_mode', 'day') == 'day':
        timelist = ProTimeRange(timerange)
    elif etl_conf.etl_op.get('run_mode', 'day') == 'hour':
        timelist = ProTimeRangeHour(timerange,
                                    etl_conf.etl_op.get('delay_hours', 2))
    else:
        log(
            '[%s] unknown run_mode %s' %
            (conffile, etl_conf.etl_op.get('run_mode', 'day')), 'error')
        return False

    etl_res = True
    for runtime in timelist:
        etl_op = copy.deepcopy(etl_conf.etl_op)
        global_args = {}
        global_args['conffile'] = conffile
        global_args['tmpdir'] = conf.tmp_dir + "/" + conffile
        global_args['tmp_dir'] = global_args['tmpdir']
        global_args['date'] = runtime[:8]
        global_args['hour'] = runtime[8:10] or '23'

        run_log = {}
        RunLogInit(run_log, etl_op, runtime, conffile)

        for job in etl_op['jobs']:
            if job_name:
                if job['job_name'] != job_name:
                    continue

            for i in range(len(job['analysis'])):
                if job['analysis'][i]['db_type'] in ['hive', 'hivecli']:
                    job['analysis'][i]['pre_sql'].insert(
                        0, "set mapred.job.queue.name=%s" % conf.mapred_queue)
                if job['analysis'][i]['db_type'] in ['mapred']:
                    job['analysis'][i]['sql'] = job['analysis'][i][
                        'sql'] + " -jobconf mapred.job.queue.name=%s " % conf.mapred_queue

            action_list = []
            action_list.extend(job['analysis'])
            action_list.extend(job['transform'])
            action_list.extend(job['loading'])
            for action in action_list:
                action['job_name'] = job['job_name']
                action.update(global_args)
                etlclass = action.get('etl_class_name', '')
                if process:
                    if action["step_name"] == process:
                        etlmodel = imp.load_source(
                            'cometl', etl_model_path + '/' + etlclass + '.py')
                        cometl = etlmodel.__dict__[etlclass](action,
                                                             db_object_dict,
                                                             log, conffile,
                                                             debugmode)
                        if cometl.run():
                            log(
                                '[%s-%s-%s-%s] success' %
                                (conffile, job['job_name'],
                                 action["step_name"], runtime), 'info')
                            return (True, '')
                        else:
                            log(
                                '[%s-%s-%s-%s] failed' %
                                (conffile, job['job_name'],
                                 action["step_name"], runtime), 'error')
                            #op_conf={"etlclass":etlclass,"conffile":conffile,"timerange":runtime,"process":action["step_name"],"debugmode":debugmode}
                            #FailedRecover(op_conf)
                            #dqsvalue = {'type':'job','title':'ETL','program':'%s_%s_%s' % (conffile,action["step_name"],runtime),'state':0,'msg':'%s_%s_%s_etl_failed' % (conffile,action["step_name"],runtime)}
                            #DqsAlarm.DQSAlarm('job',dqsvalue)
                            return (False, '%s_%s_%s_%s' %
                                    (conffile, job['job_name'],
                                     action["step_name"], runtime))
                else:
                    if run_log[job['job_name']][action["step_name"]] == 'ok':
                        continue
                    etlmodel = imp.load_source(
                        'cometl', etl_model_path + '/' + etlclass + '.py')
                    cometl = etlmodel.__dict__[etlclass](action,
                                                         db_object_dict, log,
                                                         conffile, debugmode)
                    if cometl.run():
                        log(
                            '[%s-%s-%s-%s] success' %
                            (conffile, job['job_name'], action["step_name"],
                             runtime), 'info')
                        run_log[job['job_name']][action["step_name"]] = 'ok'
                    else:
                        run_log[job['job_name']][
                            action["step_name"]] = 'failed'
                        log(
                            '[%s-%s-%s-%s] failed' %
                            (conffile, job['job_name'], action["step_name"],
                             runtime), 'error')
                        #op_conf={"etlclass":etlclass,"conffile":conffile,"timerange":runtime,"process":action["step_name"],"debugmode":debugmode}
                        if job_name:
                            return (False, '%s_%s_%s_%s' %
                                    (conffile, job['job_name'],
                                     action["step_name"], runtime))
                        else:
                            FailedRecover(conffile, runtime, run_log)
                            #dqsvalue = {'type':'job','title':'ETL','program':'%s_%s_%s_%s' % (conffile,job['job_name'],action["step_name"],runtime),'state':0,'msg':'%s_%s_%s_etl_failed' % (conffile,action["step_name"],runtime)}
                            #DqsAlarm.DQSAlarm('job',dqsvalue)
                            return (False, '%s_%s_%s_%s' %
                                    (conffile, job['job_name'],
                                     action["step_name"], runtime))

    return (True, '')