Exemple #1
0
 def LoadRes(self,etl_op,db_object_dict):
     try:
         self.logger("[%s] db LoadRes started" % self.name,'info')
         
         args = copy.deepcopy(etl_op)
         args['tmp_dir'] = etl_op['tmpdir']
         #args['data_field'] = etl_op['data_field']
         #args['db_path'] = etl_op['db_path']
         #args['data_load_type'] = etl_op['data_load_type']
         if etl_op['db_type'] == 'mysql':
             args['db_table'] = etl_op['db_path'].split('.')[1]
         if etl_op['db_type'] in ['hive','hivecli']:
             try:
                 args['db_table'] = etl_op['db_path'].split('.')[1]
             except:
                 args['db_table'] = ''
             try:
                 args['db_table_partition'] = etl_op['db_path'].split('.')[2]
             except:
                 args['db_table_partition'] = ''
         #SplitLoad(self.src_db,etl_op,self.outfile,self.logger)
         if not self.db_obj.load(self.infiledir + '/' + self.infilename,args):
             self.logger("[%s] db LoadRes exec failed" % self.name,'error')
             return False                
         
         self.logger("[%s] db LoadRes successfully" % self.name,'info')
         return True
     except Exception,e:
         self.logger("[%s] %s" % ("LoadingEtl LoadRes",str(e)),'error')
         self.logger("[%s] %s" % ("LoadingEtl LoadRes",Etltraceback()),'error')
         return False
Exemple #2
0
    def GetSqlRes(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] db getsqlres started" % self.name, 'info')

            args = copy.deepcopy(etl_op)
            args["tmpdir"] = etl_op['tmpdir']
            if etl_op['db_type'] == "mapred":
                args["mapred_output_dir"] = etl_op['sql'][etl_op["sql"].
                                                          find('-output') +
                                                          7:].split(' ', 2)[1]
            if etl_op['db_type'] == "hive":
                args['fixedfield'] = etl_op.get('fixedfield', 0)
                args['dynamicfield'] = etl_op.get('dynamicfield', 0)
                try:
                    args['tablefiledname'] = '|'.join(
                        str(x['tablefiledname'])
                        for x in etl_op['sql_xml_deal'])
                except:
                    pass

            if self.db_obj.save(self.outfiledir + '/' + self.outfilename,
                                args):
                self.logger("[%s] db getsqlres successfully" % self.name,
                            'info')
                return True
            else:
                self.logger("[%s] db getsqlres failed" % self.name, 'error')
                return False
        except Exception, e:
            self.logger("[%s] %s" % ("ExtractionEtl GetSqlRes", str(e)),
                        'error')
            self.logger(
                "[%s] %s" % ("ExtractionEtl GetSqlRes", Etltraceback()),
                'error')
            return False
Exemple #3
0
    def DbConnect(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] db connect started" % self.name, 'info')

            if not db_object_dict['Extraction'].has_key(etl_op['db_type']):
                self.logger(
                    '[%s] ComEtl not supported database [%s]' %
                    (self.name, etl_op['src_db_type']), 'error')
                return False
            self.db_obj = db_object_dict['Extraction'][etl_op['db_type']]

            connum = len(etl_op["db_coninfo"])
            for coninfo in etl_op["db_coninfo"]:
                if not self.db_obj.connect(coninfo['db_ip'],coninfo['db_port'],coninfo['db_db'],\
                           coninfo['db_user'],coninfo['db_passwd']):
                    connum -= 1
            if connum == 0:
                self.logger("[%s] src_db connect failed" % self.name, 'error')
                return False

            self.logger("[%s] db connect successfully" % self.name, 'info')
            return True
        except Exception, e:
            self.logger("[%s] %s" % ("ExtractionEtl DbConnect", str(e)),
                        'error')
            self.logger(
                "[%s] %s" % ("ExtractionEtl DbConnect", Etltraceback()),
                'error')
            return False
Exemple #4
0
    def DbExecSql(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] db execsql started" % self.name, 'info')

            if EtlUtility.__dict__.has_key(etl_op['sql_assemble']):
                sql = EtlUtility.__dict__[etl_op['sql_assemble']](etl_op,
                                                                  self.logger)
            else:
                udf = __import__('EtlUDF/%s' % etl_op['sql_assemble'])
                sql = udf.__dict__[etl_op['sql_assemble']](etl_op, self.logger)

            self.logger("[%s] sql is [%s]" % (self.name, sql), 'info')

            if not self.db_obj.execute(
                    sql,
                    officialsql=True,
                    outfile=self.outfiledir + '/' + self.outfilename):
                self.logger("[%s] db execsql failed" % self.name, 'error')
                return False
            else:
                self.logger("[%s] db execsql successfully" % self.name, 'info')
                return True
        except Exception, e:
            self.logger("[%s] %s" % ("ExtractionEtl DbExecSql", str(e)),
                        'error')
            self.logger(
                "[%s] %s" % ("ExtractionEtl DbExecSql", Etltraceback()),
                'error')
            return False
Exemple #5
0
 def GetSqlRes(self,etl_op,db_object_dict):
     try:
         self.logger("[%s] db getsqlres started" % self.name,'info')
         
         self.logger("[%s] db getsqlres failed" % self.name,'error')
         return False             
     except Exception,e:
         self.logger("[%s] %s" % ("LoadingEtl GetSqlRes",str(e)),'error')
         self.logger("[%s] %s" % ("LoadingEtl GetSqlRes",Etltraceback()),'error')
         return False
Exemple #6
0
 def DbExecSql(self,etl_op,db_object_dict):
     try:
         self.logger("[%s] db execsql started" % self.name,'info')
         
         self.logger("[%s] db execsql successfully" % self.name,'info')
         return True
     except Exception,e:
         self.logger("[%s] %s" % ("LoadingEtl DbExecSql",str(e)),'error')
         self.logger("[%s] %s" % ("LoadingEtl DbExecSql",Etltraceback()),'error')
         return False
Exemple #7
0
    def LoadRes(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] db LoadRes started" % self.name, 'info')

            #SplitLoad(self.src_db,etl_op,self.outfile,self.logger)

            self.logger("[%s] db LoadRes exec started" % self.name, 'info')
            return True
        except Exception, e:
            self.logger("[%s] %s" % ("ExtractionEtl LoadRes", str(e)), 'error')
            self.logger("[%s] %s" % ("ExtractionEtl LoadRes", Etltraceback()),
                        'error')
            return False
Exemple #8
0
 def DbPreSql(self,etl_op,db_object_dict):
     try:
         self.logger("[%s] db presql exec started" % self.name,'info')
         
         for args in etl_op["pre_sql"]:
             self.logger("[%s] db presql [%s]" % (self.name,args),'info')
             if not self.db_obj.execute(args,presql=True):
                 self.logger("[%s] db presql [%s] failed" % (self.name,args),'error')
                 return False                   
         
         self.logger("[%s] db presql exec successfully" % self.name,'info')
         return True
     except Exception,e:
         self.logger("[%s] %s" % ("LoadingEtl DbPreSql",str(e)),'error')
         self.logger("[%s] %s" % ("LoadingEtl DbPreSql",Etltraceback()),'error')
         return False
Exemple #9
0
 def run(self):
     try:
         self.logger("[%s] etl started<-><-><-><-><->" % self.name,'info')
         if self.DbConnect(self.etl_op,self.db_object_dict):
             if self.DbPreSql(self.etl_op,self.db_object_dict):
                 if self.DbExecSql(self.etl_op,self.db_object_dict):
                     if self.LoadRes(self.etl_op,self.db_object_dict):
                         if self.DbPostSql(self.etl_op,self.db_object_dict):
                             self.logger("[%s] etl successfully #############" % self.name,'info')
                             self.db_obj.close()
                             return True
         self.logger("[%s] etl failed" % self.name,'error')
         self.db_obj.close()
         return False
     except Exception,e:
         self.logger("[%s] %s" % ("LoadingEtl run",str(e)),'error')
         self.logger("[%s] %s" % ("LoadingEtl run",Etltraceback()),'error')
         return False
Exemple #10
0
    def run(self):
        try:
            self.logger("[%s] TransformEtl started<-><-><-><-><->" % self.name,
                        'info')

            if self.Transform(self.etl_op, ''):
                self.logger(
                    "[%s] TransformEtl successfully #############" % self.name,
                    'info')
                return True

            self.logger("[%s] TransformEtl failed" % self.name, 'error')
            return False
        except Exception, e:
            self.logger("[%s] %s" % ("TransformEtl run", str(e)), 'error')
            self.logger("[%s] %s" % ("TransformEtl run", Etltraceback()),
                        'error')
            return False
Exemple #11
0
    def DbPostSql(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] DbPostSql started" % self.name, 'info')

            for args in etl_op["post_sql"]:
                self.logger("[%s] db postsql [%s]" % (self.name, args), 'info')
                if not self.src_db.execute(args, postsql=True):
                    self.logger(
                        "[%s] postsql is [%s] failed" % (self.name, args),
                        'error')
                    return False

            self.logger("[%s] DbPostSql successfully" % self.name, 'info')
            return True
        except Exception, e:
            self.logger("[%s] %s" % ('ExtractionEtl DbPostSql', str(e)),
                        'error')
            self.logger(
                "[%s] %s" % ("ExtractionEtl DbPostSql", Etltraceback()),
                'error')
            return False
Exemple #12
0
    def Transform(self, etl_op, db_object_dict):
        try:
            self.logger("[%s] Transform started" % self.name, 'info')

            source_files = {}
            for source in etl_op['data_source']:
                if source.get('path', '').strip() != '':
                    source_files[source['path']] = source['data_field']
                else:
                    tmp_file = '%s/%s_%s_%s_%s_%s' % (
                        self.outfiledir, self.conf_name,
                        source.get('job_name', etl_op['job_name']),
                        source['step_name'], etl_op['date'], etl_op['hour'])
                    source_files[tmp_file] = source['data_field']

            args = copy.deepcopy(etl_op)

            if EtlUtility.__dict__.has_key(etl_op['data_transform_type']):
                if not EtlUtility.__dict__[etl_op['data_transform_type']](
                        source_files, self.outfiledir + '/' + self.outfilename,
                        self.logger, args):
                    self.logger("[%s] Transform failed" % self.name, 'error')
                    return False
            else:
                #udf = imp.load_source('udf','EtlUDF/%s.py' % etl_op['data_transform_type'])
                udf = __import__('EtlUDF/%s' % etl_op['data_transform_type'])
                if not udf.__dict__[etl_op['data_transform_type']](
                        source_files, self.outfiledir + '/' + self.outfilename,
                        self.logger, args):
                    self.logger("[%s] Transform failed" % self.name, 'error')
                    return False

            self.logger("[%s] Transform successfully" % self.name, 'info')
            return True
        except Exception, e:
            self.logger("[%s] %s" % ("TransformEtl Transform", str(e)),
                        'error')
            self.logger("[%s] %s" % ("TransformEtl Transform", Etltraceback()),
                        'error')
            return False