def getstatus():
    sess = scheduler_db_mgmt.get_session()

    ret = []
    for er in sess.query(ETLRun).all():
        etlrun_data = {
            a: str(getattr(er, a))
            for a in ["id", "name", "start", "end", "status"]
        }
        etlmodulerun_data = []
        for emr in sess.query(ETLModuleRun).filter(
                ETLModuleRun.etl_run_id == er.id).order_by(
                    ETLModuleRun.id).all():
            etlmodulerun_data += [{
                a: str(getattr(emr, a))
                for a in [
                    "id", "task_id", "name", "input", "output", "conf",
                    "start", "end", "status"
                ]
            }]

        etlrun_data["modules"] = etlmodulerun_data
        ret += [etlrun_data]
    sess.close()
    #print json.dumps(ret, indent=4)
    return json.dumps(ret)
Exemple #2
0
def start_etl(etl_run_name) : 
    sess = scheduler_db_mgmt.get_session()
    etl_run = ETLRun(name=etl_run_name, start=datetime.datetime.now(), status="PENDING")
    sess.add(etl_run)
    sess.commit()
    
    etl_sequence_path = "%s/%s.json" % (etl_sequences_dir, etl_run_name)
    
    etl_sequence = json.load(open(etl_sequence_path))
    for i in range(len(etl_sequence)) :
        etl_module_run = ETLModuleRun(
            name=etl_sequence[i]["module_name"],
            input=etl_sequence[i].get("input", None),
            output=etl_sequence[i].get("output", None),
            conf=etl_sequence[i].get("conf", None),
            status="PENDING",
            etl_run_id=etl_run.id
        )
        sess.add(etl_module_run)
        sess.commit()
        etl_sequence[i]["etl_module_run_id"] = etl_module_run.id
    
    
    taskchain = reduce(lambda c,p: c | p, [exec_module.si(**p) for p in etl_sequence])
    taskchain = taskchain | end_etl.si(etl_run_id=etl_run.id)
    sess.close()
    
    taskchain()
Exemple #3
0
def end_etl(etl_run_id) :
    sess = scheduler_db_mgmt.get_session()
    etl_run = sess.query(ETLRun).get(etl_run_id)
    etl_run.end = datetime.datetime.now() 
    etl_run.status = "SUCCESS"
    sess.add(etl_run)
    sess.commit()
    sess.close()
Exemple #4
0
def error_handler(failed_etl_module_run_id):
    sess = scheduler_db_mgmt.get_session()
    etl_module_run = sess.query(ETLModuleRun).get(failed_etl_module_run_id)
    etl_run = sess.query(ETLRun).get(etl_module_run.etl_run_id)
    etl_run.status = "FAILURE"
    etl_run.end = datetime.datetime.now()
    sess.add(etl_run)
    sess.commit()
    sess.close()
def getstatus():
    sess = scheduler_db_mgmt.get_session()

    ret = []
    for er in sess.query(ETLRun).all() :
        etlrun_data = { a: str(getattr(er, a)) for a in ["id", "name", "start", "end", "status"]}
        etlmodulerun_data = []
        for emr in sess.query(ETLModuleRun).filter(ETLModuleRun.etl_run_id==er.id).order_by(ETLModuleRun.id).all() :
            etlmodulerun_data += [{ a: str(getattr(emr, a)) for a in ["id", "task_id", "name", "input", "output", "conf", "start", "end", "status"]}]
        
        etlrun_data["modules"] = etlmodulerun_data
        ret += [etlrun_data]
    sess.close()
    #print json.dumps(ret, indent=4)
    return json.dumps(ret)
Exemple #6
0
def exec_module(self, etl_module_run_id=None, module_name=None, input=None, output=None, conf=None, task_id=None):
    if debug_slowdown :
        time.sleep(debug_slowdown)

    print (module_name, input, output, conf, task_id)
    
    sess = scheduler_db_mgmt.get_session()
    etl_module_run = sess.query(ETLModuleRun).get(etl_module_run_id)
    #etl_module_run.task_id = exec_module.request.id
    etl_module_run.task_id = task_id
    etl_module_run.start = datetime.datetime.now() 
    etl_module_run.status = "STARTED"
    sess.add(etl_module_run)
    sess.commit()
    sess.close()
    
    modules = json.load(open(modules_file))
    module_cmd = modules[module_name]["cmd"]
    if not os.path.isabs(module_cmd) :
        module_cmd = os.path.abspath(os.path.join(os.path.dirname(modules_file), module_cmd))
    
    module_params = []
    if input :
        module_params += ["--input", input]
    if output :
        module_params += ["--output", output]
    if conf :
        module_params += ["--conf", conf]
        
    if debug_slowdown :
        time.sleep(debug_slowdown)

    exit_code = subprocess.call([module_cmd]+module_params)
    
    #proc = subprocess.Popen([module_cmd]+module_params)
    #print proc.communicate()
    #exit_code = proc.wait()
    #data = sp.Popen(openRTSP + opts.split(), stdout=sp.PIPE).communicate()[0]
    #data = subprocess.Popen([module_cmd]+module_params, stdout=subprocess.PIPE).communicate()[0]
    
    #proc = subprocess.Popen([module_cmd]+module_params, stdout=subprocess.PIPE)
    #print [module_cmd]+module_params
    #output = proc.communicate()[0]
    #exit_code = proc.returncode 
    #print output
    #print data, exit_code
    
    if debug_slowdown :
        time.sleep(debug_slowdown)
    
    sess = scheduler_db_mgmt.get_session()
    etl_module_run = sess.query(ETLModuleRun).get(etl_module_run_id)
    etl_module_run.end = datetime.datetime.now() 
    
    if exit_code == 0 :
        print "Sucess!"
        self.update_state(state='SUCCESS')
        etl_module_run.status = 'SUCCESS'
    else :
        print "Fail!"
        self.update_state(state='FAILURE')
        etl_module_run.status = 'FAILURE'
        self.request.callbacks = [error_handler]
    
    etl_module_run_id = etl_module_run.id
    sess.add(etl_module_run)
    sess.commit()
    sess.close()
    
    if debug_slowdown :
        time.sleep(debug_slowdown)
    
    return etl_module_run_id