Esempio n. 1
0
def chooseItems(mode='random', nItems=10):
    """query db for produceable stuff and applies whatever
    filter, return list of typeIDs"""
    command = (f'SELECT "typeID", "manufSize" '
               f'FROM "BlueprintPriority" '
               f'WHERE "priority" = "manufacturing" '
               f'AND "lowPriority" = 0')

    typeIDs = utils.dbQuery(utils.currentDb, command, fetchAll=True)
    typeIDs = dict(typeIDs)

    if mode == 'random':
        chosen = random.sample(typeIDs.keys(), nItems)

        return {typeID: typeIDs[typeID] for typeID in chosen}
    elif mode == 'market':
        profits = market.itemProfits(typeIDs)
        sortedProfits = sorted(profits.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
        totalProjectedProfits = utils.millify(
            sum(x[1] for x in sortedProfits[0:nItems]))
        print(f"expected total profit: {totalProjectedProfits}")

        return {
            typeID[0]: typeIDs[typeID[0]]
            for typeID in sortedProfits[0:nItems]
        }
Esempio n. 2
0
def getManufacturingIndex(systemName='Ashab'):
    """"""
    command = (f'SELECT "manufacturing" '
               f'FROM sysIndices '
               f'WHERE systemName == "{systemName}"')

    index = utils.dbQuery(utils.currentDb, command)

    return index
Esempio n. 3
0
def getAdjustedPrice(typeID):
    """get adjusted price from the database"""
    command = (f'SELECT "adjPrice" '
               f'FROM adjPrices '
               f'WHERE typeID == {typeID}')

    adjPrice = utils.dbQuery(utils.currentDb, command)

    return adjPrice
Esempio n. 4
0
def _getCachedAvgPrice(typeID):
    """query the current database for avg prices"""
    command = (f'SELECT "avgPrice", "date" '
               f'FROM "avgPrices" '
               f'WHERE "typeID" = {typeID}')

    cachedPrice = utils.dbQuery(utils.currentDb, command)

    if cachedPrice:
        return cachedPrice
    else:
        return [None, None]
def checkMissing(config_list):
    print_hdr = "[datasync_quality_missing: checkMissing] - "
    print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "Entered")

    conn_metadata = None
    try:

        conn_metadata, cur_metadata = dbConnect(
            config_list['meta_db_dbName'], config_list['meta_db_dbUser'],
            config_list['meta_db_dbUrl'],
            base64.b64decode(config_list['meta_db_dbPwd']))

        check_sql = "select c.id, c.data_path, c.load_type,c.source_schemaname||'.'||c.source_tablename as source_table, c.target_schemaname||'.'||c.target_tablename as target_table, " \
                            "c.system_name, c.hvr_source_schemaname, to_char(l.last_success_run_time,'YYYY-MM-DD HH24:MI:SS') as last_success_run_time, " \
                            "to_char(q.last_count_run_time,'YYYY-MM-DD HH24:MI:SS') as last_count_run_time, to_char(c.last_run_time,'YYYY-MM-DD HH24:MI:SS') as last_control_run_time " \
                    "from sync.control_table c " \
                    "left outer join (select data_path,load_type, target_tablename,max(run_time) as last_count_run_time from sbdt.datasync_quality group by data_path,load_type,target_tablename) q " \
                    "on q.data_path = c.data_path " \
                            "AND q.load_type = c.load_type " \
                            "AND q.target_tablename = c.target_schemaname||'.'||c.target_tablename " \
                    "left outer join (select data_path, table_name, max(log_time) as last_success_run_time from sbdt.edl_log where plant_name = 'DATASYNC' and data_path in ('SRC2Hive','Talend2Hive','KFK2Hive','SQOOP2Hive') and status = 'Job Finished' group by data_path, table_name) l " \
                    "on l.data_path = c.data_path " \
                            "AND l.table_name = c.target_schemaname||'.'||c.target_tablename " \
                    "where 1 = 1 " \
                            "AND c.data_path in ('SRC2Hive','Talend2Hive','KFK2Hive','SQOOP2Hive') " \
                            "AND c.source_schemaname not in ('ftp') " \
                            "AND (c.system_name is null or c.system_name not in ('externaldata')) " \
                            "AND c.status_flag = 'Y' " \
                            "AND (c.custom_sql is NULL OR trim(c.custom_sql) = '') " \
                            "AND ((q.last_count_run_time is null) or (l.last_success_run_time is not null and q.last_count_run_time < l.last_success_run_time - interval '1 day')) " \
                    "order by last_success_run_time desc nulls last"

        print check_sql

        check_results = dbQuery(cur_metadata, check_sql)
        if len(check_results) > 0:
            mail_subject = "ATTENTION: Datasync Quality: Missing Count Validation"
            sendMailHTML(config_list['email_dataQualityReceivers'],
                         mail_subject, formatMissingMail(check_results))

    except Exception as e:
        mail_subject = "ERROR: Datasync Quality Missing"
        output_msg = "ERROR: Encountered error while running job" + "\n" + traceback.format_exc(
        )
        print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr +
              output_msg)
        sendMailHTML(config_list['email_dataQualityReceivers'], mail_subject,
                     output_msg)
        sys.exit(0)
    finally:
        if conn_metadata is not None and not conn_metadata.closed:
            conn_metadata.close()
Esempio n. 6
0
    def get_waiting_job(self, lock_dlist):
        print_hdr = "[" + self.class_name + ": get_waiting_job] - "
        conn_metadata = None

        try:
            table_list = []
            results = []
            for lock_dict in lock_dlist:
                if lock_dict['table_name'].find('=') == -1:
                    table_list.append(lock_dict['table_name'])
            table_distinct_list = list(set(table_list))

            if table_distinct_list:
                table_filter_clause = ",".join("'" + l + "'" for l in table_distinct_list)

                conn_metadata, cur_metadata = dbConnect(self.config_list['meta_db_dbName'], self.config_list['meta_db_dbUser'],
                                                        self.config_list['meta_db_dbUrl'], base64.b64decode(self.config_list['meta_db_dbPwd']))

                log_sql = "select job_key,table_name,to_char(max_start_time,'YYYY-MM-DD HH24:MI:SS') as start_time \
from ( \
    select plant_name ||' : '|| data_path||' : '||job_name||' : '||load_id||' : '||run_id as job_key,table_name,status,log_time, \
        max(log_time) over (partition by table_name) as max_start_time \
    from sbdt.edl_log \
    where 1 = 1 \
        and log_time > (current_timestamp - INTERVAL '1 day') \
        and plant_name not in ('TRANSPORTATION') \
        and (upper(data_path) not like '%2GP' or upper(data_path) not like '%2RDS' or upper(data_path) not like '%2PREDIX') \
        and table_name in (" + table_filter_clause + ") \
        and table_name is not null and length(trim(table_name)) > 0 and table_name <> 'NA') T1 \
where 1 = 1 \
        and log_time = max_start_time \
        and upper(status) like '%START%'"

                print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "log_sql: " + log_sql)

                results = dbQuery(cur_metadata, log_sql)
                print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "results: ", results)

            for lock_dict in lock_dlist:
                if len(results) > 0:
                    for result in results:
                        if (result['table_name'] == lock_dict['table_name']) and (datetime.strptime(result['start_time'], '%Y-%m-%d %H:%M:%S') >= \
                                (datetime.strptime(lock_dict['lock_datetime'], '%Y-%m-%d %H:%M:%S') - timedelta(minutes=30))):
                            lock_dict['waiting_job'] = result['job_key']
                            lock_dict['waiting_job_start_time'] = result['start_time']

        except Exception as e:
            print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "ERROR details: " + traceback.format_exc())
        finally:
            if conn_metadata is not None and not conn_metadata.closed:
                conn_metadata.close()
            return lock_dlist
Esempio n. 7
0
def baseMaterials(typeID):
    """calculate the manufacturing cost of an item"""
    typeID = int(typeID)
    returnDict = {}
    command = (f'SELECT "materialTypeID", "quantity" '
               f'FROM "industryActivityMaterials" '
               f'WHERE "TypeID" = {typeID} '
               f'AND "activityID" = 1')

    materials = utils.dbQuery(utils.staticDb, command, fetchAll=True)

    if len(materials) > 0:
        for materialTuple in materials:
            returnDict[materialTuple[0]] = materialTuple[1]
    else:
        return None

    return returnDict
Esempio n. 8
0
    def fs2hdfs(self):
        self.technology = 'Python'
        self.system_name = 'HDFS'
        self.job_name = 'FS-->HDFS'
        t = datetime.fromtimestamp(time.time())
        v_timestamp = str(t.strftime('%Y-%m-%d %H:%M:%S'))
        tablename = self.target_schemaname + "." + self.target_tablename

        try:
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
        except psycopg2.Error as e:
            error = 1
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            return error, err_msg, output_msg

        try:
            run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
            run_id_lists = dbQuery(cur_metadata, run_id_sql)
            run_id_list = run_id_lists[0]
            run_id = run_id_list['nextval']
            print "Run ID for the table", tablename, " is : ", run_id
        except Exception as e:
            print e
            error = 2
            err_msg = "Error while getting Run ID"
            status = "Job Error"
            output_msg = traceback.format_exc()
            audit_logging(cur_metadata, self.load_id, 0, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
        status = 'Job Started'
        error = 0
        err_msg = ''
        output_msg = ''
        audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                      self.system_name, self.job_name, tablename, status,
                      self.data_path, self.technology, 0, 0, 0, error, err_msg,
                      0, 0, output_msg)

        if len(self.source_schemaname) > 0 and len(self.source_tablename) > 0:
            local_file_name = self.source_schemaname + self.source_tablename
        elif len(self.source_schemaname) > 0 and len(
                self.source_tablename) == 0:
            local_file_name = self.source_schemaname
        elif len(self.source_schemaname) == 0 and len(
                self.source_tablename) > 0:
            local_file_name = self.source_tablename
        else:
            error = 2
            err_msg = "No source to run this program"
            output = "No source to run this program"
            status = 'Job Error'
            print err_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename
        print local_file_name
        try:
            files = glob.glob(local_file_name)
            if len(files) == 0:
                error = 3
                err_msg = "No data found"
                output = "No data found"
                print err_msg
                return error, err_msg, tablename
            else:
                self.target_path = self.hive_warehouse_path + "/" + self.target_schemaname + ".db/" + self.target_tablename + "/"
                (ret, out, err) = run_cmd(
                    ['hadoop', 'fs', '-rm', '-r', (self.target_path + "*")])
                if ret:
                    if err.find("No such file or directory") <> -1:
                        (ret, out, err) = run_cmd(
                            ['hadoop', 'fs', '-mkdir', self.target_path])
                        if ret:
                            pass
                    else:
                        error = 4
                        err_msg = "Error in cleaning in target path"
                        output = traceback.format_exc()
                        return error, err_msg, tablename
        except Exception as e:
            error = 5
            err_msg = "Error while checking the local file path or cleaning the target location in HDFS"
            output = traceback.format_exc()
            status = 'Job Error'
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        try:
            files = glob.glob(local_file_name)
            for file in files:
                (ret, out, err) = run_cmd(
                    ['hadoop', 'fs', '-copyFromLocal', file, self.target_path])
                if ret > 0:
                    error = 5
                    err_msg = "Error in ingesting into HDFS"
                    output = traceback.format_exc()
                    return error, err_msg, tablename
        except Exception as e:
            error = 6
            err_msg = "Error while loading data into HDFS"
            output = traceback.format_exc()
            status = 'Job Error'
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        try:
            update_control_info_sql = "UPDATE sync.control_table set last_run_time = '" + v_timestamp + "' where id = " + str(
                self.id
            ) + " AND target_schemaname = '" + self.target_schemaname + "' AND target_tablename = '" + self.target_tablename + "' AND data_path = '" + self.data_path + "'"
            print update_control_info_sql
            cur_metadata.execute(update_control_info_sql)
        except psycopg2.Error as e:
            print e
            error = 7
            err_msg = "Error while updating the control table"
            print err_msg
            status = 'Job Error'
            output_msg = traceback.format_exc()
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        # Final log entry
        try:
            error = 0
            err_msg = 'No Errors'
            status = 'Job Finished'
            output_msg = 'Job Finished successfully'
            print output_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)

        except psycopg2.Error as e:
            error = 15
            err_msg = "Error while dropping external table in target"
            print err_msg
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename
        finally:
            conn_metadata.close()
        return error, err_msg, tablename
Esempio n. 9
0
    def fs2hdfs_hive_log(self):
        hosts = []
        # Get information about the table to load
        try:
            metadata_sql = "SELECT * FROM sync.control_table \
                        WHERE target_tablename = 'hive_log_ext' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "before connecting to metastore controls")
            controls = dbQuery(cur_metadata, metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            return output_msg
            sys.exit(error)
        finally:
            conn_metadata.close()

        if not controls:
            error = 3
            err_msg = "No Entry found in control table".format(error)
            status = 'Job Error'
            output_msg = "No Entry found in control table"
            return output_msg
            sys.exit(error)

        self.id = str(controls[0]['id'])
        self.source_schema = str(controls[0]['source_schemaname'])
        self.source_tablename = str(controls[0]['source_tablename'])
        self.target_schema = str(controls[0]['target_schemaname'])
        self.target_tablename = str(controls[0]['target_tablename'])
        partitioned = controls[0]['is_partitioned']
        self.load_type = str(controls[0]['load_type'])
        self.s3_backed = controls[0]['s3_backed']
        first_partitioned_column = str(controls[0]['first_partitioned_column'])
        second_partitioned_column = str(
            controls[0]['second_partitioned_column'])
        partitioned_column_transformation = str(
            controls[0]['partition_column_transformation'])
        custom_sql = str(controls[0]['custom_sql'])
        self.join_columns = str(controls[0]['join_columns'])
        self.archived_enabled = controls[0]['archived_enabled']
        distribution_columns = str(controls[0]['distribution_columns'])
        dist_col_transformation = str(controls[0]['dist_col_transformation'])
        self.log_mode = str(controls[0]['log_mode'])
        self.last_run_time = str(controls[0]['last_run_time'])

        incoming_path = self.paths + "/hiveserver2.log"
        local_inprogress_path = self.local_staging_path + "/in_progress/"
        inprogress_path = self.staging_path + self.target_schema + "/" + self.target_tablename + "/in_progress/"
        hosts = self.hive_hosts.split(',')
        print hosts
        # Creating the Local in_progress and/or clearing that location for new incoming files
        for host in hosts:
            print "Inside Host path check"
            path_to_check = self.local_staging_path + host
            print path_to_check
            path_check = glob.glob(path_to_check)
            print path_check
            if len(path_check) > 0:
                print "Path exists... Clearing the directory"
                (ret, out, err) = run_cmd(['rm', '-rf', (path_to_check)])
                print(ret, out, err)
                if ret:
                    error = 1
                    err_msg = "Error while cleaning in_progress location in Local FS".format(
                        error)
                    print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                          err)
                    status = 'Job Error'
                    output_msg = traceback.format_exc()
                    print output_msg
                    sys.exit(error)
                    return output_msg

            (ret, out, err) = run_cmd(['mkdir', '-p', path_to_check])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg

        path_check = glob.glob(local_inprogress_path)
        if len(path_check) > 0:
            print "Path exists... Clearing the directory"
            (ret, out, err) = run_cmd(['rm', '-rf', (local_inprogress_path)])
            print(ret, out, err)
            if ret:
                error = 1
                err_msg = "Error while cleaning in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        (ret, out, err) = run_cmd(['mkdir', '-p', local_inprogress_path])
        if ret:
            error = 1
            err_msg = "Error while creating in_progress location in Local FS".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg

# Creating the HDFS in_progress location and/or clearing that location for new incoming files
        (ret, out,
         err) = run_cmd(["hadoop", "fs", "-test", "-e", inprogress_path])
        if ret:
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "Directory does not exist ... Creating...")
            (ret, out,
             err) = run_cmd(["hadoop", "fs", "-mkdir", "-p", inprogress_path])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in HDFS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        # else:
        #     (ret, out, err) = run_cmd(["hadoop", "fs", "-rm", "-r", inprogress_path + "*"])
        #     if ret:
        #         if err.find("No such file or directory") <> -1:
        #             pass
        #         else:
        #             error = 1
        #             err_msg = "Error while cleaning in_progress location in HDFS".format(error)
        #             print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
        #             status = 'Job Error'
        #             output_msg = traceback.format_exc()
        #             print output_msg
        #             return output_msg

# Checking the last run time of the table.
# Bringing the files from each host since the last run time
        from datetime import date, timedelta
        if self.last_run_time == 'None':
            self.last_run_time = str(datetime.now())
        print "Last Run Time : ", self.last_run_time
        lr_dt, lr_ts = self.last_run_time.split()
        lr_dt = datetime.strptime(lr_dt, "%Y-%m-%d").date()
        today = datetime.now().date()
        delta = today - lr_dt
        # hosts = self.hive_hosts.split(',')
        print hosts
        for host in hosts:
            (ret, out, err) = run_cmd([
                'scp', ('hdp@' + host + ':' + incoming_path),
                (self.local_staging_path + host + "/")
            ])
            print ret, out, err
            if ret > 0:
                error = 1
                err_msg = "Error while moving Current Log File to Local in_progress location".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg
            for i in range(delta.days):
                dt = (lr_dt + timedelta(days=i))
                dtstr = dt.isoformat()
                print dtstr
                (ret, out, err) = run_cmd([
                    'scp',
                    ('hdp@' + host + ':' + incoming_path + '.' + dtstr + '*'),
                    (self.local_staging_path + host + "/")
                ])
                print ret, out, err
                if ret > 0:
                    if err.find('No such file or directory') <> -1:
                        pass
                    else:
                        error = 1
                        err_msg = "Error while moving data to in_progress location".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print output_msg
                        sys.exit(error)
                        return output_msg

# Unzipping the files if there are any zipped files
        for host in hosts:
            files = glob.glob((self.local_staging_path + host + "/*"))
            for file in files:
                if file.find(".gz") <> -1:
                    try:
                        with gzip.open(file, 'rb') as f_in:
                            with open((file.replace('.gz', '_') + host),
                                      'wb') as f_out:
                                shutil.copyfileobj(f_in, f_out)
                    except Exception as e:
                        error = 4
                        err_msg = "Error while unzipping file in Local FS"
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                    #(ret,out,err)       = run_cmd(['gunzip', '-c', file, ' > ','test')])
                    # (ret, out, err) = run_cmd(['gunzip', file])
                    #(ret, out, err) = run_cmd(['zcat',  file, '>', (file.replace('.gz', '_') + host)])
                    # if ret > 0:
                    #     error = 1
                    #     err_msg = "Error while unzipping file in Local FS".format(error)
                    #     print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                    #     status = 'Job Error'
                    #     output_msg = traceback.format_exc()
                    #     print err_msg, output_msg
                    #     return output_msg
                    (ret, out, err) = run_cmd(['rm', '-f', file])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while removing zipped file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                else:
                    (ret, out,
                     err) = run_cmd(['mv', file, (file + '_' + host)])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while renaming file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg

# Moving the final set of files to the in_progress location to send it to HDFS
            move_files((self.local_staging_path + host + "/*"),
                       local_inprogress_path)
            if ret > 0:
                error = 1
                err_msg = "Error while moving files to in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg


# Ingesting to HDFS

        (ret, out, err) = run_cmd([
            'hadoop', 'distcp', '-overwrite',
            'file:///' + (local_inprogress_path + "/*"),
            'hdfs:///' + inprogress_path
        ])
        if ret > 0:
            error = 1
            err_msg = "Error while moving files to HDFS from Local in_progress path".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print err_msg, output_msg
            sys.exit(error)
            return output_msg

        try:
            metadata_sql = "UPDATE sync.control_table SET last_run_time = now() \
                        WHERE target_tablename = 'hive_log' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            cur_metadata.execute(metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg
        finally:
            conn_metadata.close()
Esempio n. 10
0
        elif data_path.find("GP2HDFS") <> -1:
            metadata_sql = metadata_sql + " AND source_schemaname = '" + input_schema + "'" \
                           + " AND (hvr_last_processed_value > last_run_time OR last_run_time IS NULL)"
        else:
            metadata_sql = metadata_sql + " AND source_schemaname = '" + input_schema + "'"
        if input_tablename_list is not None:
            if data_path.find("MIR2") <> -1:
                metadata_sql = metadata_sql + " AND target_tablename in (" + tablename_filter + ")"
            else:
                metadata_sql = metadata_sql + " AND source_tablename in (" + tablename_filter + ")"
        metadata_sql = metadata_sql + " AND load_type = '" + load_type + "'"

        print(print_hdr + "metadata_sql: " + metadata_sql)

        load_id_sql = "select nextval('sbdt.edl_load_id_seq')"
        controls = dbQuery(cur_metadata, metadata_sql)
        print(print_hdr + "controls: ", controls)
        load_id_lists = dbQuery(cur_metadata, load_id_sql)
        load_id_list = load_id_lists[0]
        load_id = load_id_list['nextval']
        print(print_hdr + "load_id: " + str(load_id))

        input = []
        if len(controls) > 0:
            for control in controls:
                # Special logic to mirror table from one schema in GP to a different schema in HIVE
                if data_path.find("GP2HDFS") <> -1 and control[
                        'source_schemaname'] <> control[
                            'target_schemaname'] and not is_special_logic:
                    error = 2
                    err_msg = "datasync_driver: main[{0}]: ERROR: Mirror loading between different schemas is not allowed: " \
Esempio n. 11
0
def count(schemaname, loadtype):

    config_list = load_config()
    metastore_dbName = config_list['meta_db_dbName']
    dbmeta_Url = config_list['meta_db_dbUrl']
    dbmeta_User = config_list['meta_db_dbUser']
    dbmeta_Pwd = base64.b64decode(config_list['meta_db_dbPwd'])

    dbtgt_host = config_list['src_db_hive_dbUrl']
    dbtgt_host2 = config_list['src_db_hive_dbUrl2']

    dbtgt_Port = config_list['src_db_hive_dataPort']
    dbtgt_Auth = config_list['src_db_hive_authMech']

    src_dbName = config_list['src_db_gp_dbName']
    dbsrc_Url = config_list['src_db_gp_dbUrl']
    dbsrc_User = config_list['src_db_gp_dbUser']
    dbsrc_Pwd = base64.b64decode(config_list['src_db_gp_dbPwd'])

    emailSender = config_list['email_sender']
    emailReceiver = config_list['email_receivers']

    t = datetime.fromtimestamp(time.time())
    v_timestamp = str(t.strftime('%Y-%m-%d %H:%M:%S'))

    input_source_schema = schemaname
    load_type = loadtype
    print input_source_schema

    # try:
    #     count = 0
    #     for pid in psutil.pids():
    #         p = psutil.Process(pid)
    #         if p.name() == "python2.7" and  p.cmdline()[2] == input_source_schema:
    #             print p.name(), p.cmdline()[1], p.cmdline()[2]
    #             count = count +1
    # except Exception as e:
    #     print e
    #     return
    # print count
    # if count > 0:
    #     err_msg = "Exiting Count program as Loads are running . . ."
    #     print err_msg
    #     load_id = "None"
    #     error_table_list = input_source_schema
    #     sendMail(emailSender,emailReceiver,err_msg,error_table_list,load_id)
    #     return
    # else:

    try:
        conn_metadata, cur_metadata = txn_dbConnect(metastore_dbName,
                                                    dbmeta_User, dbmeta_Url,
                                                    dbmeta_Pwd)
    except Exception as e:
        err_msg = "Error connecting to database while fetching  metadata"
        # Send Email
        print e
        return

    plant_name = "DATASYNC"
    system_name = "GPDB-Hive"
    job_name = "COUNT " + input_source_schema
    tablename = input_source_schema
    data_path = "GP2HDFS"
    technology = "Python"
    rows_inserted = 0
    rows_deleted = 0
    rows_updated = 0
    num_errors = 0
    count_sql_gpdb = ""
    count_sql_hive = ""

    load_id_sql = "select nextval('sbdt.edl_load_id_seq')"
    load_id_lists = dbQuery(cur_metadata, load_id_sql)
    load_id_list = load_id_lists[0]
    load_id = load_id_list['nextval']
    print "Load ID for this run is : ", load_id

    run_id_sql = "select nextval('sync.datasync_seq')"
    run_id_lists = dbQuery(cur_metadata, run_id_sql)
    run_id_list = run_id_lists[0]
    run_id = run_id_list['nextval']
    print "Run ID for this run is : ", run_id

    metadata_sql        = "SELECT source_schemaname||'.'||source_tablename||'-'||incremental_column as table_name "     \
                          "FROM sync.control_table  where data_path = 'GP2HDFS'  "                \
                          " and source_schemaname = '" + input_source_schema + "' AND load_type = '" + load_type + "'"
    print metadata_sql
    control = dbQuery(cur_metadata, metadata_sql)
    control_df = pd.DataFrame(control)
    control_df.columns = ['table_name']
    new_control = control_df['table_name'].tolist()

    status = 'Job Started'
    output_msg = ''
    err_msg = ''
    audit_logging(cur_metadata, load_id,run_id, plant_name, system_name, job_name, tablename,status, \
              data_path, technology,rows_inserted,rows_updated, rows_deleted, num_errors, err_msg ,0,0,output_msg)
    q = 0
    for j in new_control:
        table_name, incremental_col = j.split('-')
        if q < len(new_control) - 1:
            count_sql_gpdb += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name, 'GPDB' as db_name , '" + v_timestamp + "' as end_date, max(" + incremental_col + "::timestamp without time zone) as max_incr_col FROM " + table_name + " WHERE " + incremental_col + " > '1900-01-01' AND " + incremental_col + " <= '" + v_timestamp + "' UNION ALL "
            count_sql_hive += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name, 'Hive' as db_name , cast('" + v_timestamp + "' as timestamp) as end_date,max(hive_updated_date) as max_incr_col FROM " + table_name + " WHERE hive_updated_date > '1900-01-01' AND hive_updated_date <= '" + v_timestamp + "' UNION ALL "
            q = q + 1
        else:
            count_sql_gpdb += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name , 'GPDB' as db_name , '" + v_timestamp + "' as end_date, max(" + incremental_col + "::timestamp without time zone) as max_incr_col FROM " + table_name + " WHERE " + incremental_col + " > '1900-01-01' AND " + incremental_col + " <= '" + v_timestamp + "'"
            count_sql_hive += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name , 'Hive' as db_name, cast('" + v_timestamp + "' as timestamp) as end_date, max(hive_updated_date) as max_incr_col FROM " + table_name + " WHERE hive_updated_date > '1900-01-01' AND hive_updated_date <= '" + v_timestamp + "'"

    print "Running GPDB Count . . . . ."
    # print count_sql_gpdb

    try:
        conn_source, cur_source = dbConnect(src_dbName, dbsrc_User, dbsrc_Url,
                                            dbsrc_Pwd)
    except psycopg2.Error as e:
        err_msg = "Error connecting to source database"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        #continue
        return

    try:
        temp_table_sql = "CREATE TEMP TABLE count_" + input_source_schema + " AS " + count_sql_gpdb
        # print temp_table_sql
        cur_source.execute(temp_table_sql)
    except psycopg2.Error as e:
        print e
        err_msg = "Error while creating temp table in source"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        #continue
        return

    try:
        file = "/apps/staging/g00003/counts_" + input_source_schema + ".txt"
        gpdb_count_op_sql = "COPY count_" + input_source_schema + " TO STDOUT DELIMITER '|' NULL ''"
        pg_count_ip_sql = "COPY counts FROM STDIN DELIMITER '|' NULL ''"
        fo = open(file, 'w')
        cur_source.copy_expert(gpdb_count_op_sql, fo)
        fo.close()
        fi = open(file, 'r')
        cur_metadata.copy_expert(pg_count_ip_sql, fi)
        fi.close()
    except psycopg2.Error as e:
        err_msg = "Error while copying"
        print err_msg
        print e
        status = 'Job Error'
        output_msg = traceback.format_exc()
        conn_metadata.close()
        conn_source.close()
        #continue
        return
    conn_source.close()

    print "Running Hive Count. . . . . "

    try:
        conn_target, cur_target = dbConnectHive(dbtgt_host, dbtgt_Port,
                                                dbtgt_Auth)
    except Exception as e:
        try:
            conn_target, cur_target = dbConnectHive(dbtgt_host2, dbtgt_Port,
                                                    dbtgt_Auth)
        except Exception as e:
            err_msg = "Error while connecting to target database"
            status = 'Job Error'
            print e
            output_msg = e
            audit_logging(cur_metadata, load_id, run_id, plant_name,
                          system_name, job_name, tablename, status, data_path,
                          technology, rows_inserted, rows_updated,
                          rows_deleted, num_errors, err_msg, 0, 0, output_msg)
            conn_metadata.rollback()
            conn_metadata.close()
            conn_source.close()
            return

    count_view_sql = "CREATE OR REPLACE VIEW counts_" + input_source_schema + " AS " + count_sql_hive
    # print count_view_sql
    try:
        cur_target.execute(count_view_sql)
    except Exception as e:
        print e
        err_msg = "Error while creating  view"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    count_query = "SELECT * FROM counts_" + input_source_schema

    try:
        cur_target.execute(count_query)
    except Exception as e:
        print e
        err_msg = "Error while executing count query"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    #results = {}
    #column = 0
    #for d in cur_target.description:
    #    results[d[0]] = column
    #    column = column + 1

    columnNames = [a['columnName'] for a in cur_target.getSchema()]
    # print columnNames
    try:
        count_df = pd.DataFrame(cur_target.fetchall(), columns=columnNames)
        file = "/apps/staging/g00003/counts_" + input_source_schema + ".txt"
        f1 = open(file, 'w')
        count_df.to_csv(path_or_buf=f1, sep='\t', header=False, index=False)
        f1.close()
    except Exception as e:
        print e
        err_msg = "Error while writing Data Frame into file"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    try:
        copy_sql = "COPY public.counts FROM STDIN WITH DELIMITER '\t'"
        fo = open(file)
        cur_metadata.copy_expert(copy_sql, fo)
        run_cmd([
            'rm', '-f',
            '/apps/staging/g00003/counts_' + input_source_schema + '.txt'
        ])
        err_msg = "Count completed successfully . . ."
        print err_msg
        error_table_list = input_source_schema
        conn_target.close()
    except Exception as e:
        print e
        err_msg = "Error while inserting data into final table"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    # Final log entry
    try:
        error = 0
        err_msg = 'No Errors'
        status = 'Job Finished'
        output_msg = 'Job Finished successfully'
        print output_msg
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename,status, \
                      data_path, technology,rows_inserted,rows_updated, rows_deleted, num_errors, err_msg ,0,0,output_msg)
    except psycopg2.Error as e:
        error = 15
        err_msg = "Error while dropping external table in target"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        print output_msg
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_target.rollback()
        conn_target.close()
        conn_metadata.close()
        return error, err_msg, tablename

    conn_metadata.commit()
    conn_metadata.close()
Esempio n. 12
0
def fn_call(fn_name, load_id=None, run_id=None):
    config = read_config(['/apps/common/environ.properties'])
    env = config.get('branch', 'env')
    metastore_dbName = config.get(env + '.meta_db', 'dbName')
    dbmeta_Url = config.get(env + '.meta_db', 'dbUrl')
    dbmeta_User = config.get(env + '.meta_db', 'dbUser')
    dbmeta_Pwd = base64.b64decode(config.get(env + '.meta_db', 'dbPwd'))

    dbtgt_Url = config.get(env + '.tgt_db_i360', 'dbUrl')
    dbtgt_User = config.get(env + '.tgt_db_i360', 'dbUser')
    dbtgt_dbName = config.get(env + '.tgt_db_i360', 'dbName')
    dbtgt_Pwd = base64.b64decode(config.get(env + '.tgt_db_i360', 'dbPwd'))

    # Making the Job Started entry
    try:
        conn_metadata, cur_metadata = dbConnect(metastore_dbName, dbmeta_User,
                                                dbmeta_Url, dbmeta_Pwd)
        # status = 'Job Started'
        plant_name = 'GE Transportation'
        system_name = 'RDS'
        job_name = 'RDS - Trigger DB Function'
        tablename = fn_name
        # audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, '', 0, 0, '')
    except Exception as e:
        output_msg = traceback.format_exc()
        error = 1
        err_msg = "Error: Unable to generate LOAD ID"
        print err_msg, output_msg
        # sendMail(emailSender, emailReceiver, err_msg, tablename, load_id, env, "ERROR","DataIKU Backup", '')
        return error, err_msg


# Generating load id if it was not supplied
    try:
        if load_id is None:
            load_id_sql = "select nextval('sbdt.edl_load_id_seq')"
            load_id_lists = dbQuery(cur_metadata, load_id_sql)
            load_id_list = load_id_lists[0]
            load_id = load_id_list['nextval']
    except Exception as e:
        output_msg = traceback.format_exc()
        error = 1
        status = 'Job Error'
        err_msg = "Error: connecting to logging database while making first audit entry"
        print err_msg, output_msg
        audit_logging(cur_metadata, 0, 0, plant_name, system_name, job_name,
                      tablename, status, '', 'Python', 0, 0, 0, 0, err_msg, 0,
                      0, output_msg)
        return error, err_msg

    try:
        if run_id is None:
            run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
            run_id_lists = dbQuery(cur_metadata, run_id_sql)
            run_id_list = run_id_lists[0]
            run_id = run_id_list['nextval']
    except Exception as e:
        error = 1
        err_msg = "Error: connecting to logging database while making second audit entry"
        print err_msg
        output_msg = traceback.format_exc()
        status = 'Job Error'
        audit_logging(cur_metadata, 0, 0, plant_name, system_name, job_name,
                      tablename, status, '', 'Python', 0, 0, 0, 0, err_msg, 0,
                      0, output_msg)
        return error, err_msg

    try:
        conn_target, cur_target = dbConnect(dbtgt_dbName, dbtgt_User,
                                            dbtgt_Url, dbtgt_Pwd)
    except Exception as e:
        error = 2
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while connecting to the Target  Database"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, '', 'Python', 0, 0, 0, 0,
                      err_msg, 0, 0, output_msg)
        return error, err_msg

    try:
        fn_name_list = fn_name.split(',')
        for fn_name in fn_name_list:
            status = 'Job Started'
            tablename = fn_name.split('(')[0]
            audit_logging(cur_metadata, load_id, run_id, plant_name,
                          system_name, job_name, tablename, status, '',
                          'Python', 0, 0, 0, 0, '', 0, 0, '')
            if fn_name.find("(") <> -1 and fn_name.find(")") <> -1:
                fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name)
                print "Running SQL : SELECT * FROM " + fn_name
            else:
                fn_result = dbQuery(cur_target,
                                    "SELECT * FROM " + fn_name + "()")
                print "Running SQL : SELECT * FROM " + fn_name + "()"
            print fn_result
            print fn_result[0][fn_name.split('(')[0].split('.')[1]]
            for notice in conn_target.notices:
                print notice
            if str(
                    fn_result[0]
                [fn_name.split('(')[0].split('.')[1]]) == 'False' or str(
                    fn_result[0][fn_name.split('(')[0].split('.')[1]]) == '1':
                print "Function returned False in the Target Database. Please check the function for more details"
                error = 4
                status = 'Job Error'
                output_msg = traceback.format_exc()
                err_msg = "Function returned False in the Target Database. Please check the function for more details"
                audit_logging(cur_metadata, load_id, run_id, plant_name,
                              system_name, job_name, tablename, status, '',
                              'Python', 0, 0, 0, 0, err_msg, 0, 0, output_msg)
                conn_metadata.close()
                conn_target.close()
                return error, err_msg
            else:
                status = 'Job Finished'
                error = 0
                err_msg = 'No Error'
                audit_logging(cur_metadata, load_id, run_id, plant_name,
                              system_name, job_name, tablename, status, '',
                              'Python', 0, 0, 0, 0, '', 0, 0, '')

    except Exception as e:
        error = 3
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while running the RDS Function"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, '', 'Python', 0, 0, 0, 0,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    # if str(fn_result[0][fn_name.split('(')[0].split('.')[1]]) == 'False' or str(fn_result[0][fn_name.split('(')[0].split('.')[1]]) == '1':
    #     print "Function returned False in the Target Database. Please check the function for more details"
    #     error = 4
    #     status = 'Job Error'
    #     output_msg = traceback.format_exc()
    #     err_msg = "Function returned False in the Target Database. Please check the function for more details"
    #     audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
    #     conn_metadata.close()
    #     conn_target.close()
    #     return error, err_msg

    # status = 'Job Finished'
    # error = 0
    # err_msg = 'No Error'
    # audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python', 0,0, 0, 0, '', 0, 0, '')
    conn_metadata.close()
    conn_target.close()
    return error, err_msg
Esempio n. 13
0
def fn_call(load_id,fn_name):
    config              = read_config(['/apps/common/environ.properties'])
    env                 = config.get('branch', 'env')
    metastore_dbName    = config.get(env + '.meta_db', 'dbName')
    dbmeta_Url          = config.get(env + '.meta_db', 'dbUrl')
    dbmeta_User         = config.get(env + '.meta_db', 'dbUser')
    dbmeta_Pwd          = base64.b64decode(config.get(env + '.meta_db', 'dbPwd'))

    dbtgt_Url_predix_wto    = config.get(env + '.tgt_db_predix_wto', 'dbUrl')
    dbtgt_User_predix_wto   = config.get(env + '.tgt_db_predix_wto', 'dbUser')
    dbtgt_dbName_predix_wto = config.get(env + '.tgt_db_predix_wto', 'dbName')
    dbtgt_Pwd_predix_wto    = base64.b64decode(config.get(env + '.tgt_db_predix_wto', 'dbPwd'))
    dbtgt_dbName_port_wto   = config.get(env + '.tgt_db_predix_wto', 'dbPort')

    try:
        conn_metadata, cur_metadata = dbConnect(metastore_dbName, dbmeta_User, dbmeta_Url, dbmeta_Pwd)
        run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
        run_id_lists = dbQuery(cur_metadata, run_id_sql)
        run_id_list = run_id_lists[0]
        run_id = run_id_list['nextval']
        status= 'Job Started'
        plant_name = 'GE Transportation'
        system_name = 'WTO Predix'
        job_name = 'WTO Predix - Trigger DB Function'
        tablename = 'WTO Predix'
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename,status, '', 'Python', 0, 0, 0, 0, '',0, 0, '')
    except Exception as e:
        error = 1
        err_msg = "Error: connecting to logging database while making first audit entry"
        print err_msg
        # sendMail(emailSender, emailReceiver, err_msg, tablename, load_id, env, "ERROR","DataIKU Backup", '')
        return error, err_msg

    try:
        conn_target, cur_target = txn_dbConnect(dbtgt_dbName_predix_wto, dbtgt_User_predix_wto, dbtgt_Url_predix_wto,dbtgt_Pwd_predix_wto, dbtgt_dbName_port_wto)

    except Exception as e:
        error = 2
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while connecting to the Target Predix Database"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        return error, err_msg

    try:
        if fn_name.find("(") <> -1 and fn_name.find(")") <> -1:
            fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name)
        else:
            fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name + "()")
        # print fn_result
        # print fn_result[0]['proc_wto_wheel_data']
    except Exception as e:
        error = 3
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while running the Predix Function"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    if str(fn_result[0]['proc_wto_wheel_data']) == 'False' or str(fn_result[0]['proc_wto_wheel_data']) == '1':
        print "Function returned False in the Predix Database. Please check the function for more details"
        error = 4
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Function returned False in the Predix Database. Please check the function for more details"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    status = 'Job Finished'
    err_msg = ''
    audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python', 0,0, 0, 0, '', 0, 0, '')
    conn_metadata.close()
    conn_target.close()