def run_pipeline(self):
     print("Running Pipeline")
     ingest_process = ingest.Ingest()
     ingest_process.ingest_data()
     tranform_process = transform.Transform()
     tranform_process.transform_data()
     persist_process = persist.Persist()
     persist_process.persist_data()
Example #2
0
 def run_pipeline(self):
     print("Running Pipeline")
     ingest_process = ingest.Ingest(self.spark)
     df = ingest_process.ingest_data()
     df.show()
     tranform_process = transform.Transform(self.spark)
     transformed_df = tranform_process.transform_data(df)
     transformed_df.show()
     persist_process = persist.Persist(self.spark)
     persist_process.persist_data(transformed_df)
     return
 def run_pipeline(self):
     logging.info('run_pipeline method started')
     ingest_process = ingest.Ingest(self.spark)
     df = ingest_process.ingest_data()
     df.show()
     tranform_process = transform.Transform(self.spark)
     transformed_df = tranform_process.transform_data(df)
     transformed_df.show()
     persist_process = persist.Persist(self.spark)
     persist_process.persist_data(transformed_df)
     logging.info('run_pipeline method ended')
     return
    def run_pipeline(self):
        try:
            logging.info('run_pipeline method started')
            ingest_process = ingest.Ingest(self.spark)
            df = ingest_process.ingest_data()
            df.show()
            tranform_process = transform.Transform(self.spark)
            transformed_df = tranform_process.transform_data(df)
            transformed_df.show()
            persist_process = persist.Persist(self.spark)
            persist_process.persist_data(transformed_df)
            logging.info('run_pipeline method ended')
        except Exception as exp:
            logging.error("An error occured while running the pipeline > " +
                          str(exp))
            # send email notification
            # log error to database
            sys.exit(1)

        return
Example #5
0
    err = args.get_invalid_modules_in_args()
    if len(err) > 0:
        print("ERR - Unknow module(s)  : " + ','.join(err))
        sys.exit()

    # conf.yml,  conf.d/*.yml
    cmt.CONF = conf.load_conf()

    # if cron mode, introduce a small uase (offset) tô spread the load on metrology servers
    if cmt.ARGS['cron']:
        mypause = conf.get_startoffset()
        time.sleep(mypause)

    # Persist
    cmt.PERSIST = persist.Persist(file=cmt.DEFAULT_PERSIST_FILE)
    if cmt.ARGS["nopersist"]:
        cmt.PERSIST.dict = {}
    lastrun = cmt.PERSIST.get_key("cmt_last_run", 0)

    # remote conf (url) or cached conf
    conf.load_conf_remote(cmt.CONF)

    # check master switch / CMT disabled ?
    ts_global_enable = cmt.CONF['global'].get('enable', 'no')
    if not conf.is_timeswitch_on(ts_global_enable):
        logit("CMT globally disabled by conf")
        sys.exit()

    # CLI : check config option ?
    if cmt.ARGS["checkconfig"]: