def LoadRes(self,etl_op,db_object_dict): try: self.logger("[%s] db LoadRes started" % self.name,'info') args = copy.deepcopy(etl_op) args['tmp_dir'] = etl_op['tmpdir'] #args['data_field'] = etl_op['data_field'] #args['db_path'] = etl_op['db_path'] #args['data_load_type'] = etl_op['data_load_type'] if etl_op['db_type'] == 'mysql': args['db_table'] = etl_op['db_path'].split('.')[1] if etl_op['db_type'] in ['hive','hivecli']: try: args['db_table'] = etl_op['db_path'].split('.')[1] except: args['db_table'] = '' try: args['db_table_partition'] = etl_op['db_path'].split('.')[2] except: args['db_table_partition'] = '' #SplitLoad(self.src_db,etl_op,self.outfile,self.logger) if not self.db_obj.load(self.infiledir + '/' + self.infilename,args): self.logger("[%s] db LoadRes exec failed" % self.name,'error') return False self.logger("[%s] db LoadRes successfully" % self.name,'info') return True except Exception,e: self.logger("[%s] %s" % ("LoadingEtl LoadRes",str(e)),'error') self.logger("[%s] %s" % ("LoadingEtl LoadRes",Etltraceback()),'error') return False
def GetSqlRes(self, etl_op, db_object_dict): try: self.logger("[%s] db getsqlres started" % self.name, 'info') args = copy.deepcopy(etl_op) args["tmpdir"] = etl_op['tmpdir'] if etl_op['db_type'] == "mapred": args["mapred_output_dir"] = etl_op['sql'][etl_op["sql"]. find('-output') + 7:].split(' ', 2)[1] if etl_op['db_type'] == "hive": args['fixedfield'] = etl_op.get('fixedfield', 0) args['dynamicfield'] = etl_op.get('dynamicfield', 0) try: args['tablefiledname'] = '|'.join( str(x['tablefiledname']) for x in etl_op['sql_xml_deal']) except: pass if self.db_obj.save(self.outfiledir + '/' + self.outfilename, args): self.logger("[%s] db getsqlres successfully" % self.name, 'info') return True else: self.logger("[%s] db getsqlres failed" % self.name, 'error') return False except Exception, e: self.logger("[%s] %s" % ("ExtractionEtl GetSqlRes", str(e)), 'error') self.logger( "[%s] %s" % ("ExtractionEtl GetSqlRes", Etltraceback()), 'error') return False
def DbConnect(self, etl_op, db_object_dict): try: self.logger("[%s] db connect started" % self.name, 'info') if not db_object_dict['Extraction'].has_key(etl_op['db_type']): self.logger( '[%s] ComEtl not supported database [%s]' % (self.name, etl_op['src_db_type']), 'error') return False self.db_obj = db_object_dict['Extraction'][etl_op['db_type']] connum = len(etl_op["db_coninfo"]) for coninfo in etl_op["db_coninfo"]: if not self.db_obj.connect(coninfo['db_ip'],coninfo['db_port'],coninfo['db_db'],\ coninfo['db_user'],coninfo['db_passwd']): connum -= 1 if connum == 0: self.logger("[%s] src_db connect failed" % self.name, 'error') return False self.logger("[%s] db connect successfully" % self.name, 'info') return True except Exception, e: self.logger("[%s] %s" % ("ExtractionEtl DbConnect", str(e)), 'error') self.logger( "[%s] %s" % ("ExtractionEtl DbConnect", Etltraceback()), 'error') return False
def DbExecSql(self, etl_op, db_object_dict): try: self.logger("[%s] db execsql started" % self.name, 'info') if EtlUtility.__dict__.has_key(etl_op['sql_assemble']): sql = EtlUtility.__dict__[etl_op['sql_assemble']](etl_op, self.logger) else: udf = __import__('EtlUDF/%s' % etl_op['sql_assemble']) sql = udf.__dict__[etl_op['sql_assemble']](etl_op, self.logger) self.logger("[%s] sql is [%s]" % (self.name, sql), 'info') if not self.db_obj.execute( sql, officialsql=True, outfile=self.outfiledir + '/' + self.outfilename): self.logger("[%s] db execsql failed" % self.name, 'error') return False else: self.logger("[%s] db execsql successfully" % self.name, 'info') return True except Exception, e: self.logger("[%s] %s" % ("ExtractionEtl DbExecSql", str(e)), 'error') self.logger( "[%s] %s" % ("ExtractionEtl DbExecSql", Etltraceback()), 'error') return False
def GetSqlRes(self,etl_op,db_object_dict): try: self.logger("[%s] db getsqlres started" % self.name,'info') self.logger("[%s] db getsqlres failed" % self.name,'error') return False except Exception,e: self.logger("[%s] %s" % ("LoadingEtl GetSqlRes",str(e)),'error') self.logger("[%s] %s" % ("LoadingEtl GetSqlRes",Etltraceback()),'error') return False
def DbExecSql(self,etl_op,db_object_dict): try: self.logger("[%s] db execsql started" % self.name,'info') self.logger("[%s] db execsql successfully" % self.name,'info') return True except Exception,e: self.logger("[%s] %s" % ("LoadingEtl DbExecSql",str(e)),'error') self.logger("[%s] %s" % ("LoadingEtl DbExecSql",Etltraceback()),'error') return False
def LoadRes(self, etl_op, db_object_dict): try: self.logger("[%s] db LoadRes started" % self.name, 'info') #SplitLoad(self.src_db,etl_op,self.outfile,self.logger) self.logger("[%s] db LoadRes exec started" % self.name, 'info') return True except Exception, e: self.logger("[%s] %s" % ("ExtractionEtl LoadRes", str(e)), 'error') self.logger("[%s] %s" % ("ExtractionEtl LoadRes", Etltraceback()), 'error') return False
def DbPreSql(self,etl_op,db_object_dict): try: self.logger("[%s] db presql exec started" % self.name,'info') for args in etl_op["pre_sql"]: self.logger("[%s] db presql [%s]" % (self.name,args),'info') if not self.db_obj.execute(args,presql=True): self.logger("[%s] db presql [%s] failed" % (self.name,args),'error') return False self.logger("[%s] db presql exec successfully" % self.name,'info') return True except Exception,e: self.logger("[%s] %s" % ("LoadingEtl DbPreSql",str(e)),'error') self.logger("[%s] %s" % ("LoadingEtl DbPreSql",Etltraceback()),'error') return False
def run(self): try: self.logger("[%s] etl started<-><-><-><-><->" % self.name,'info') if self.DbConnect(self.etl_op,self.db_object_dict): if self.DbPreSql(self.etl_op,self.db_object_dict): if self.DbExecSql(self.etl_op,self.db_object_dict): if self.LoadRes(self.etl_op,self.db_object_dict): if self.DbPostSql(self.etl_op,self.db_object_dict): self.logger("[%s] etl successfully #############" % self.name,'info') self.db_obj.close() return True self.logger("[%s] etl failed" % self.name,'error') self.db_obj.close() return False except Exception,e: self.logger("[%s] %s" % ("LoadingEtl run",str(e)),'error') self.logger("[%s] %s" % ("LoadingEtl run",Etltraceback()),'error') return False
def run(self): try: self.logger("[%s] TransformEtl started<-><-><-><-><->" % self.name, 'info') if self.Transform(self.etl_op, ''): self.logger( "[%s] TransformEtl successfully #############" % self.name, 'info') return True self.logger("[%s] TransformEtl failed" % self.name, 'error') return False except Exception, e: self.logger("[%s] %s" % ("TransformEtl run", str(e)), 'error') self.logger("[%s] %s" % ("TransformEtl run", Etltraceback()), 'error') return False
def DbPostSql(self, etl_op, db_object_dict): try: self.logger("[%s] DbPostSql started" % self.name, 'info') for args in etl_op["post_sql"]: self.logger("[%s] db postsql [%s]" % (self.name, args), 'info') if not self.src_db.execute(args, postsql=True): self.logger( "[%s] postsql is [%s] failed" % (self.name, args), 'error') return False self.logger("[%s] DbPostSql successfully" % self.name, 'info') return True except Exception, e: self.logger("[%s] %s" % ('ExtractionEtl DbPostSql', str(e)), 'error') self.logger( "[%s] %s" % ("ExtractionEtl DbPostSql", Etltraceback()), 'error') return False
def Transform(self, etl_op, db_object_dict): try: self.logger("[%s] Transform started" % self.name, 'info') source_files = {} for source in etl_op['data_source']: if source.get('path', '').strip() != '': source_files[source['path']] = source['data_field'] else: tmp_file = '%s/%s_%s_%s_%s_%s' % ( self.outfiledir, self.conf_name, source.get('job_name', etl_op['job_name']), source['step_name'], etl_op['date'], etl_op['hour']) source_files[tmp_file] = source['data_field'] args = copy.deepcopy(etl_op) if EtlUtility.__dict__.has_key(etl_op['data_transform_type']): if not EtlUtility.__dict__[etl_op['data_transform_type']]( source_files, self.outfiledir + '/' + self.outfilename, self.logger, args): self.logger("[%s] Transform failed" % self.name, 'error') return False else: #udf = imp.load_source('udf','EtlUDF/%s.py' % etl_op['data_transform_type']) udf = __import__('EtlUDF/%s' % etl_op['data_transform_type']) if not udf.__dict__[etl_op['data_transform_type']]( source_files, self.outfiledir + '/' + self.outfilename, self.logger, args): self.logger("[%s] Transform failed" % self.name, 'error') return False self.logger("[%s] Transform successfully" % self.name, 'info') return True except Exception, e: self.logger("[%s] %s" % ("TransformEtl Transform", str(e)), 'error') self.logger("[%s] %s" % ("TransformEtl Transform", Etltraceback()), 'error') return False