コード例 #1
0
 def transferMoney(self):
     cursor, connection = utils.dbConnect()
     recv = input("Enter card number to transfer money to: ")
     cursor.execute(
         f"SELECT COUNT(*) FROM cards WHERE CardNumber = '{recv}';")
     if self.luhnAlgo(recv, 0) is False:
         print("Invalid Card Number!")
     elif recv == self.cardNo:
         print("You can't transfer money to yourself!")
     elif cursor.fetchone()[0] == 0:
         print("No such card exists!")
     else:
         cursor.execute(
             f"SELECT Balance FROM cards WHERE CardNumber='{self.cardNo}'")
         senderBal = cursor.fetchone()[0]
         cursor.execute(
             f"SELECT Balance FROM cards WHERE CardNumber='{recv}'")
         receiverBal = cursor.fetchone()[0]
         amount = int(input("Enter how much money you want to transfer: "))
         if amount > senderBal:
             print('Value is greater than amount in your savings!')
         else:
             cursor.execute(
                 f"UPDATE cards SET balance={senderBal - amount} WHERE CardNumber='{self.cardNo}'"
             )
             connection.commit()
             cursor.execute(
                 f"UPDATE cards SET balance={receiverBal + amount} WHERE CardNumber='{recv}'"
             )
             connection.commit()
             print("Transfer successful!")
コード例 #2
0
    def __init__(self, userID=None):
        self.FirstName = None
        self.LastName = None
        self.cardNo = None
        self.pin = None
        self.balance = 0

        if userID is not None:
            cursor, _ = utils.dbConnect()
            cursor.execute(
                f"SELECT CardNumber FROM cards WHERE rowid = {userID}")
            while True:
                row = cursor.fetchone()
                if row is None:
                    break
                self.cardNo = row[0]
            cursor.execute(
                f"SELECT FirstName,LastName,PIN,Balance FROM cards WHERE CardNumber = '{self.cardNo}';"
            )
            while True:
                row = cursor.fetchone()
                if row is None:
                    break
                self.FirstName = row[0]
                self.LastName = row[1]
                self.pin = row[2]
                self.balance = row[3]
コード例 #3
0
def get_cursor():

    # config = read_config(['/apps/common/environ.properties'])

    # if(config == None):
    #     return
    # # get the current branch (from local.properties)
    # env             = config.get('branch','env')
    #
    # # proceed to point everything at the 'branched' resources
    # metastore_dbName           = config.get(env+'.meta_db','dbName')
    # dbmeta_Url                 = config.get(env+'.meta_db','dbUrl')
    # dbmeta_User                = config.get(env+'.meta_db','dbUser')
    # dbmeta_Pwd                 = base64.b64decode(config.get(env+'.meta_db','dbPwd'))

    config_list = load_config()
    if ((config_list == None) or (len(config_list) == 0)):
        return
    try:
        # conn_metadata, cur    = dbConnect(metastore_dbName, dbmeta_User, dbmeta_Url, dbmeta_Pwd)
        conn_metadata, cur = dbConnect(
            config_list['meta_db_dbName'], config_list['meta_db_dbUser'],
            config_list['meta_db_dbUrl'],
            base64.b64decode(config_list['meta_db_dbPwd']))
    except Exception as e:
        err_msg = "Error connecting to database while fetching cursor"
        return
    return cur
コード例 #4
0
def login():
    cursor, _ = utils.dbConnect()
    username = input("Enter username: "******"Enter password: "******"select rowid,username,password from users"
    credentials = []
    cursor.execute(query)

    for x in cursor:
        credentials.append(x)

    loggedIn = False
    
    for i in range(len(credentials)):
        if username == credentials[i][1]:
            if password == credentials[i][2]:
                loggedIn = True
                print("Successfully logged in!")
                acc = account.reg(credentials[i][0])
            else:
                print("Invalid password")

    if loggedIn:
        customer.customerMenu(acc)
    else:
        print("No such username exists!")
            
コード例 #5
0
def signUp():
    cursor, connection = utils.dbConnect()
    firstName = input("Enter your first name: ")
    lastName = input("Enter your last name: ")
    username = input("Enter username: "******"Enter password: "******"Enter address: ")
    panID = input("Enter PAN ID: ")
    aadhaarID = input("Enter Aadhaar ID: ")

    try:
        query = "INSERT INTO users (FirstName, LastName, Username, Password, Address, PAN, Aadhaar) VALUES (%s, %s, %s, %s, %s, %s, %s)"
        value = (firstName, lastName, username, password, address, panID, aadhaarID)
        cursor.execute(query, value)
        connection.commit()
        time.sleep(5)
        utils.clear()
    except mysql.connector.IntegrityError:
        print("Username is already in use!\n\n")
        connection.rollback()
        time.sleep(5)
        utils.clear()
    except:
        if len(panID) > 10:
            print("Invalid PAN ID!")
        elif len(aadhaarID) > 12:
            print("Invalid Aadhaar ID!")
        connection.rollback()
        utils.clear()
コード例 #6
0
    def createCard(self):
        firstName = input("Enter first name: ")
        lastName = input("Enter last name: ")
        pin = int(input("Enter 4 digit PIN: "))
        bal = float(input("Enter starting balance: (min 500)"))
        self.FirstName = firstName
        self.LastName = lastName
        self.pin = pin
        self.cardNo = '400000' + "".join(
            [str(random.randrange(10)) for i in range(9)])
        self.cardNo = self.luhnAlgo(self.cardNo, 1)
        self.balance = bal

        cursor, connection = utils.dbConnect()
        cursor.execute(
            f"INSERT into cards(CardNumber, FirstName, LastName, PIN, Balance) VALUES ({self.cardNo}, '{self.FirstName}', '{self.LastName}', {self.pin}, {self.balance})"
        )
        connection.commit()
        connection.execute(
            f"UPDATE users SET CardNumber = '{self.cardNo}' WHERE FirstName = '{self.FirstName}' AND LastName = '{self.LastName}';"
        )
        connection.commit()
        print(
            f"Your card has been created! The details are as follows: \n\nAccount Number: {self.cardNo}\nFirst Name: {self.FirstName}\nLast Name: {self.LastName}\nPIN: {self.pin}\nBalance: ${self.balance}"
        )
        connection.close()
コード例 #7
0
 def closeAccount(self):
     cursor, connection = utils.dbConnect()
     cursor.execute(f"DELETE from cards WHERE CardNumber='{self.cardNo}'")
     cursor.execute(
         f"DELETE from users WHERE FirstName='{self.FirstName}' and PIN={self.pin}"
     )
     connection.commit()
     print("Account has been closed!")
コード例 #8
0
 def depositMoney(self):
     val = float(input("How much money would you like to deposit: "))
     cursor, connection = utils.dbConnect()
     cursor.execute(
         f"UPDATE cards SET Balance = Balance + {val} WHERE CardNumber = '{self.cardNo}'"
     )
     connection.commit()
     print(f"{val} was credited into your account.")
     connection.close()
コード例 #9
0
 def getBalance(self):
     cursor, _ = utils.dbConnect()
     cursor.execute(
         f"SELECT Balance FROM cards WHERE CardNumber='{self.cardNo}'")
     balance = -1
     while True:
         row = cursor.fetchone()
         if row is None:
             break
         balance = row[0]
     return balance
コード例 #10
0
def checkMissing(config_list):
    print_hdr = "[datasync_quality_missing: checkMissing] - "
    print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "Entered")

    conn_metadata = None
    try:

        conn_metadata, cur_metadata = dbConnect(
            config_list['meta_db_dbName'], config_list['meta_db_dbUser'],
            config_list['meta_db_dbUrl'],
            base64.b64decode(config_list['meta_db_dbPwd']))

        check_sql = "select c.id, c.data_path, c.load_type,c.source_schemaname||'.'||c.source_tablename as source_table, c.target_schemaname||'.'||c.target_tablename as target_table, " \
                            "c.system_name, c.hvr_source_schemaname, to_char(l.last_success_run_time,'YYYY-MM-DD HH24:MI:SS') as last_success_run_time, " \
                            "to_char(q.last_count_run_time,'YYYY-MM-DD HH24:MI:SS') as last_count_run_time, to_char(c.last_run_time,'YYYY-MM-DD HH24:MI:SS') as last_control_run_time " \
                    "from sync.control_table c " \
                    "left outer join (select data_path,load_type, target_tablename,max(run_time) as last_count_run_time from sbdt.datasync_quality group by data_path,load_type,target_tablename) q " \
                    "on q.data_path = c.data_path " \
                            "AND q.load_type = c.load_type " \
                            "AND q.target_tablename = c.target_schemaname||'.'||c.target_tablename " \
                    "left outer join (select data_path, table_name, max(log_time) as last_success_run_time from sbdt.edl_log where plant_name = 'DATASYNC' and data_path in ('SRC2Hive','Talend2Hive','KFK2Hive','SQOOP2Hive') and status = 'Job Finished' group by data_path, table_name) l " \
                    "on l.data_path = c.data_path " \
                            "AND l.table_name = c.target_schemaname||'.'||c.target_tablename " \
                    "where 1 = 1 " \
                            "AND c.data_path in ('SRC2Hive','Talend2Hive','KFK2Hive','SQOOP2Hive') " \
                            "AND c.source_schemaname not in ('ftp') " \
                            "AND (c.system_name is null or c.system_name not in ('externaldata')) " \
                            "AND c.status_flag = 'Y' " \
                            "AND (c.custom_sql is NULL OR trim(c.custom_sql) = '') " \
                            "AND ((q.last_count_run_time is null) or (l.last_success_run_time is not null and q.last_count_run_time < l.last_success_run_time - interval '1 day')) " \
                    "order by last_success_run_time desc nulls last"

        print check_sql

        check_results = dbQuery(cur_metadata, check_sql)
        if len(check_results) > 0:
            mail_subject = "ATTENTION: Datasync Quality: Missing Count Validation"
            sendMailHTML(config_list['email_dataQualityReceivers'],
                         mail_subject, formatMissingMail(check_results))

    except Exception as e:
        mail_subject = "ERROR: Datasync Quality Missing"
        output_msg = "ERROR: Encountered error while running job" + "\n" + traceback.format_exc(
        )
        print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr +
              output_msg)
        sendMailHTML(config_list['email_dataQualityReceivers'], mail_subject,
                     output_msg)
        sys.exit(0)
    finally:
        if conn_metadata is not None and not conn_metadata.closed:
            conn_metadata.close()
コード例 #11
0
    def get_waiting_job(self, lock_dlist):
        print_hdr = "[" + self.class_name + ": get_waiting_job] - "
        conn_metadata = None

        try:
            table_list = []
            results = []
            for lock_dict in lock_dlist:
                if lock_dict['table_name'].find('=') == -1:
                    table_list.append(lock_dict['table_name'])
            table_distinct_list = list(set(table_list))

            if table_distinct_list:
                table_filter_clause = ",".join("'" + l + "'" for l in table_distinct_list)

                conn_metadata, cur_metadata = dbConnect(self.config_list['meta_db_dbName'], self.config_list['meta_db_dbUser'],
                                                        self.config_list['meta_db_dbUrl'], base64.b64decode(self.config_list['meta_db_dbPwd']))

                log_sql = "select job_key,table_name,to_char(max_start_time,'YYYY-MM-DD HH24:MI:SS') as start_time \
from ( \
    select plant_name ||' : '|| data_path||' : '||job_name||' : '||load_id||' : '||run_id as job_key,table_name,status,log_time, \
        max(log_time) over (partition by table_name) as max_start_time \
    from sbdt.edl_log \
    where 1 = 1 \
        and log_time > (current_timestamp - INTERVAL '1 day') \
        and plant_name not in ('TRANSPORTATION') \
        and (upper(data_path) not like '%2GP' or upper(data_path) not like '%2RDS' or upper(data_path) not like '%2PREDIX') \
        and table_name in (" + table_filter_clause + ") \
        and table_name is not null and length(trim(table_name)) > 0 and table_name <> 'NA') T1 \
where 1 = 1 \
        and log_time = max_start_time \
        and upper(status) like '%START%'"

                print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "log_sql: " + log_sql)

                results = dbQuery(cur_metadata, log_sql)
                print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "results: ", results)

            for lock_dict in lock_dlist:
                if len(results) > 0:
                    for result in results:
                        if (result['table_name'] == lock_dict['table_name']) and (datetime.strptime(result['start_time'], '%Y-%m-%d %H:%M:%S') >= \
                                (datetime.strptime(lock_dict['lock_datetime'], '%Y-%m-%d %H:%M:%S') - timedelta(minutes=30))):
                            lock_dict['waiting_job'] = result['job_key']
                            lock_dict['waiting_job_start_time'] = result['start_time']

        except Exception as e:
            print (logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr + "ERROR details: " + traceback.format_exc())
        finally:
            if conn_metadata is not None and not conn_metadata.closed:
                conn_metadata.close()
            return lock_dlist
コード例 #12
0
    def update_control(self, input_schema_name, input_table_name, update_code,
                       run_id):
        method_name = self.class_name + ": " + "update_control"
        print_hdr = "[" + method_name + ": " + self.data_path + ": " + str(
            self.load_id) + "] - "
        print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr +
              "Entered")

        conn_metadata = None
        cur_metadata = ''
        try:
            metastore_dbName = self.config_list['meta_db_dbName']
            dbmeta_Url = self.config_list['meta_db_dbUrl']
            dbmeta_User = self.config_list['meta_db_dbUser']
            dbmeta_Pwd = base64.b64decode(self.config_list['meta_db_dbPwd'])
            conn_metadata, cur_metadata = dbConnect(metastore_dbName,
                                                    dbmeta_User, dbmeta_Url,
                                                    dbmeta_Pwd)

            update_control_table_sql = "UPDATE sync.control_table \
                                SET load_status_cd = '" + update_code + "' \
                                WHERE target_schemaname = '" + input_schema_name + "'"
            if self.data_path in ['GP2HDFS', 'HDFS2MIR']:
                update_control_table_sql = update_control_table_sql \
                                            + " AND target_tablename in( '" + input_table_name + "','" + input_table_name + "_ext')" \
                                            + " AND data_path in ('GP2HDFS','HDFS2MIR')"
            else:
                update_control_table_sql = update_control_table_sql \
                                            + " AND target_tablename in( '" + input_table_name + "')" \
                                            + " AND data_path in ('" + self.data_path + "')"

            print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr +
                  "update_control_table_sql: " + update_control_table_sql)
            cur_metadata.execute(update_control_table_sql)

        except Exception as e:
            error = 1
            err_msg = method_name + "[{0}]: Error while updating status in control table".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name,
                          (input_schema_name + '.' + input_table_name), status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
        finally:
            if conn_metadata is not None and not conn_metadata.closed:
                conn_metadata.close()
コード例 #13
0
ファイル: pull_stats.py プロジェクト: dwright04/psst_analysis
def main():
    """
    stat_files = get_files(PATH)
    
    ml_stats = extract_ml_stats(stat_files)
    ghost_stats = extract_ghosts(stat_files)
    movers_stats = extract_movers(stat_files)
    """
    conn = utils.dbConnect("psdb2", "dew", "", "ps13pi")
    if not conn:
        print "Cannot connect to the database"
        exit(1)
    output = open("psst_machine_learning_stats.csv","w")                                       
    output.write("#date,total,ml_rejected,threshold,ghost_total,ghost_movers,total_movers,promoted,atticpossible,recoveredmovers,seeing\n")

    eyeballing_stats = extract_eyeballing(conn, gen_dates(START_DATE))
    seeing_stats = extract_seeing(gen_dates(START_DATE))

    for date in gen_dates(START_DATE):
        total, ml_rejected, threshold = extract_ml_stats_sql(conn,date)
        ghosts_total, ghost_movers = extract_ghosts_sql(conn, date)
        total_movers = extract_movers_sql(conn, date)
        try:
            promoted = eyeballing_stats[date]["promoted"]
        except KeyError:
            promoted = 0
        try:
            possible_attic = eyeballing_stats[date]["atticpossible"]
        except KeyError:
            possible_attic = 0
        try:
             recovered_movers = eyeballing_stats[date]["movers"]
        except KeyError:
             possible_movers = 0
        try:
            seeing = seeing_stats[date]["seeing"]
        except KeyError:
            seeing = 0

        output.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(date,total,ml_rejected,\
        threshold,ghosts_total,ghost_movers,total_movers,promoted,possible_attic,\
        recovered_movers,seeing))
    output.close()
コード例 #14
0
ファイル: auditLog.py プロジェクト: jjayadeep06/Sync
def get_cursor():

    config = read_config(['/apps/common/environ.properties'])
    if(config == None):
        return
    # get the current branch (from local.properties)
    env             = config.get('branch','env')

    # proceed to point everything at the 'branched' resources
    metastore_dbName           = config.get(env+'.meta_db','dbName')
    dbmeta_Url                 = config.get(env+'.meta_db','dbUrl')
    dbmeta_User                = config.get(env+'.meta_db','dbUser')
    dbmeta_Pwd                 = base64.b64decode(config.get(env+'.meta_db','dbPwd'))

    try:
        conn_metadata, cur    = dbConnect(metastore_dbName, dbmeta_User, dbmeta_Url, dbmeta_Pwd)
    except Exception as e:
        err_msg = "Error connecting to database while fetching cursor"
        return
    return cur
コード例 #15
0
def fn_call(fn_name, load_id=None, run_id=None):
    config = read_config(['/apps/common/environ.properties'])
    env = config.get('branch', 'env')
    metastore_dbName = config.get(env + '.meta_db', 'dbName')
    dbmeta_Url = config.get(env + '.meta_db', 'dbUrl')
    dbmeta_User = config.get(env + '.meta_db', 'dbUser')
    dbmeta_Pwd = base64.b64decode(config.get(env + '.meta_db', 'dbPwd'))

    dbtgt_Url = config.get(env + '.tgt_db_i360', 'dbUrl')
    dbtgt_User = config.get(env + '.tgt_db_i360', 'dbUser')
    dbtgt_dbName = config.get(env + '.tgt_db_i360', 'dbName')
    dbtgt_Pwd = base64.b64decode(config.get(env + '.tgt_db_i360', 'dbPwd'))

    # Making the Job Started entry
    try:
        conn_metadata, cur_metadata = dbConnect(metastore_dbName, dbmeta_User,
                                                dbmeta_Url, dbmeta_Pwd)
        # status = 'Job Started'
        plant_name = 'GE Transportation'
        system_name = 'RDS'
        job_name = 'RDS - Trigger DB Function'
        tablename = fn_name
        # audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, '', 0, 0, '')
    except Exception as e:
        output_msg = traceback.format_exc()
        error = 1
        err_msg = "Error: Unable to generate LOAD ID"
        print err_msg, output_msg
        # sendMail(emailSender, emailReceiver, err_msg, tablename, load_id, env, "ERROR","DataIKU Backup", '')
        return error, err_msg


# Generating load id if it was not supplied
    try:
        if load_id is None:
            load_id_sql = "select nextval('sbdt.edl_load_id_seq')"
            load_id_lists = dbQuery(cur_metadata, load_id_sql)
            load_id_list = load_id_lists[0]
            load_id = load_id_list['nextval']
    except Exception as e:
        output_msg = traceback.format_exc()
        error = 1
        status = 'Job Error'
        err_msg = "Error: connecting to logging database while making first audit entry"
        print err_msg, output_msg
        audit_logging(cur_metadata, 0, 0, plant_name, system_name, job_name,
                      tablename, status, '', 'Python', 0, 0, 0, 0, err_msg, 0,
                      0, output_msg)
        return error, err_msg

    try:
        if run_id is None:
            run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
            run_id_lists = dbQuery(cur_metadata, run_id_sql)
            run_id_list = run_id_lists[0]
            run_id = run_id_list['nextval']
    except Exception as e:
        error = 1
        err_msg = "Error: connecting to logging database while making second audit entry"
        print err_msg
        output_msg = traceback.format_exc()
        status = 'Job Error'
        audit_logging(cur_metadata, 0, 0, plant_name, system_name, job_name,
                      tablename, status, '', 'Python', 0, 0, 0, 0, err_msg, 0,
                      0, output_msg)
        return error, err_msg

    try:
        conn_target, cur_target = dbConnect(dbtgt_dbName, dbtgt_User,
                                            dbtgt_Url, dbtgt_Pwd)
    except Exception as e:
        error = 2
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while connecting to the Target  Database"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, '', 'Python', 0, 0, 0, 0,
                      err_msg, 0, 0, output_msg)
        return error, err_msg

    try:
        fn_name_list = fn_name.split(',')
        for fn_name in fn_name_list:
            status = 'Job Started'
            tablename = fn_name.split('(')[0]
            audit_logging(cur_metadata, load_id, run_id, plant_name,
                          system_name, job_name, tablename, status, '',
                          'Python', 0, 0, 0, 0, '', 0, 0, '')
            if fn_name.find("(") <> -1 and fn_name.find(")") <> -1:
                fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name)
                print "Running SQL : SELECT * FROM " + fn_name
            else:
                fn_result = dbQuery(cur_target,
                                    "SELECT * FROM " + fn_name + "()")
                print "Running SQL : SELECT * FROM " + fn_name + "()"
            print fn_result
            print fn_result[0][fn_name.split('(')[0].split('.')[1]]
            for notice in conn_target.notices:
                print notice
            if str(
                    fn_result[0]
                [fn_name.split('(')[0].split('.')[1]]) == 'False' or str(
                    fn_result[0][fn_name.split('(')[0].split('.')[1]]) == '1':
                print "Function returned False in the Target Database. Please check the function for more details"
                error = 4
                status = 'Job Error'
                output_msg = traceback.format_exc()
                err_msg = "Function returned False in the Target Database. Please check the function for more details"
                audit_logging(cur_metadata, load_id, run_id, plant_name,
                              system_name, job_name, tablename, status, '',
                              'Python', 0, 0, 0, 0, err_msg, 0, 0, output_msg)
                conn_metadata.close()
                conn_target.close()
                return error, err_msg
            else:
                status = 'Job Finished'
                error = 0
                err_msg = 'No Error'
                audit_logging(cur_metadata, load_id, run_id, plant_name,
                              system_name, job_name, tablename, status, '',
                              'Python', 0, 0, 0, 0, '', 0, 0, '')

    except Exception as e:
        error = 3
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while running the RDS Function"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, '', 'Python', 0, 0, 0, 0,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    # if str(fn_result[0][fn_name.split('(')[0].split('.')[1]]) == 'False' or str(fn_result[0][fn_name.split('(')[0].split('.')[1]]) == '1':
    #     print "Function returned False in the Target Database. Please check the function for more details"
    #     error = 4
    #     status = 'Job Error'
    #     output_msg = traceback.format_exc()
    #     err_msg = "Function returned False in the Target Database. Please check the function for more details"
    #     audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
    #     conn_metadata.close()
    #     conn_target.close()
    #     return error, err_msg

    # status = 'Job Finished'
    # error = 0
    # err_msg = 'No Error'
    # audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python', 0,0, 0, 0, '', 0, 0, '')
    conn_metadata.close()
    conn_target.close()
    return error, err_msg
コード例 #16
0
def count(schemaname, loadtype):

    config_list = load_config()
    metastore_dbName = config_list['meta_db_dbName']
    dbmeta_Url = config_list['meta_db_dbUrl']
    dbmeta_User = config_list['meta_db_dbUser']
    dbmeta_Pwd = base64.b64decode(config_list['meta_db_dbPwd'])

    dbtgt_host = config_list['src_db_hive_dbUrl']
    dbtgt_host2 = config_list['src_db_hive_dbUrl2']

    dbtgt_Port = config_list['src_db_hive_dataPort']
    dbtgt_Auth = config_list['src_db_hive_authMech']

    src_dbName = config_list['src_db_gp_dbName']
    dbsrc_Url = config_list['src_db_gp_dbUrl']
    dbsrc_User = config_list['src_db_gp_dbUser']
    dbsrc_Pwd = base64.b64decode(config_list['src_db_gp_dbPwd'])

    emailSender = config_list['email_sender']
    emailReceiver = config_list['email_receivers']

    t = datetime.fromtimestamp(time.time())
    v_timestamp = str(t.strftime('%Y-%m-%d %H:%M:%S'))

    input_source_schema = schemaname
    load_type = loadtype
    print input_source_schema

    # try:
    #     count = 0
    #     for pid in psutil.pids():
    #         p = psutil.Process(pid)
    #         if p.name() == "python2.7" and  p.cmdline()[2] == input_source_schema:
    #             print p.name(), p.cmdline()[1], p.cmdline()[2]
    #             count = count +1
    # except Exception as e:
    #     print e
    #     return
    # print count
    # if count > 0:
    #     err_msg = "Exiting Count program as Loads are running . . ."
    #     print err_msg
    #     load_id = "None"
    #     error_table_list = input_source_schema
    #     sendMail(emailSender,emailReceiver,err_msg,error_table_list,load_id)
    #     return
    # else:

    try:
        conn_metadata, cur_metadata = txn_dbConnect(metastore_dbName,
                                                    dbmeta_User, dbmeta_Url,
                                                    dbmeta_Pwd)
    except Exception as e:
        err_msg = "Error connecting to database while fetching  metadata"
        # Send Email
        print e
        return

    plant_name = "DATASYNC"
    system_name = "GPDB-Hive"
    job_name = "COUNT " + input_source_schema
    tablename = input_source_schema
    data_path = "GP2HDFS"
    technology = "Python"
    rows_inserted = 0
    rows_deleted = 0
    rows_updated = 0
    num_errors = 0
    count_sql_gpdb = ""
    count_sql_hive = ""

    load_id_sql = "select nextval('sbdt.edl_load_id_seq')"
    load_id_lists = dbQuery(cur_metadata, load_id_sql)
    load_id_list = load_id_lists[0]
    load_id = load_id_list['nextval']
    print "Load ID for this run is : ", load_id

    run_id_sql = "select nextval('sync.datasync_seq')"
    run_id_lists = dbQuery(cur_metadata, run_id_sql)
    run_id_list = run_id_lists[0]
    run_id = run_id_list['nextval']
    print "Run ID for this run is : ", run_id

    metadata_sql        = "SELECT source_schemaname||'.'||source_tablename||'-'||incremental_column as table_name "     \
                          "FROM sync.control_table  where data_path = 'GP2HDFS'  "                \
                          " and source_schemaname = '" + input_source_schema + "' AND load_type = '" + load_type + "'"
    print metadata_sql
    control = dbQuery(cur_metadata, metadata_sql)
    control_df = pd.DataFrame(control)
    control_df.columns = ['table_name']
    new_control = control_df['table_name'].tolist()

    status = 'Job Started'
    output_msg = ''
    err_msg = ''
    audit_logging(cur_metadata, load_id,run_id, plant_name, system_name, job_name, tablename,status, \
              data_path, technology,rows_inserted,rows_updated, rows_deleted, num_errors, err_msg ,0,0,output_msg)
    q = 0
    for j in new_control:
        table_name, incremental_col = j.split('-')
        if q < len(new_control) - 1:
            count_sql_gpdb += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name, 'GPDB' as db_name , '" + v_timestamp + "' as end_date, max(" + incremental_col + "::timestamp without time zone) as max_incr_col FROM " + table_name + " WHERE " + incremental_col + " > '1900-01-01' AND " + incremental_col + " <= '" + v_timestamp + "' UNION ALL "
            count_sql_hive += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name, 'Hive' as db_name , cast('" + v_timestamp + "' as timestamp) as end_date,max(hive_updated_date) as max_incr_col FROM " + table_name + " WHERE hive_updated_date > '1900-01-01' AND hive_updated_date <= '" + v_timestamp + "' UNION ALL "
            q = q + 1
        else:
            count_sql_gpdb += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name , 'GPDB' as db_name , '" + v_timestamp + "' as end_date, max(" + incremental_col + "::timestamp without time zone) as max_incr_col FROM " + table_name + " WHERE " + incremental_col + " > '1900-01-01' AND " + incremental_col + " <= '" + v_timestamp + "'"
            count_sql_hive += "SELECT " + str(
                run_id
            ) + " as run_id, COUNT(*) as COUNT,'" + table_name + "' as table_name , 'Hive' as db_name, cast('" + v_timestamp + "' as timestamp) as end_date, max(hive_updated_date) as max_incr_col FROM " + table_name + " WHERE hive_updated_date > '1900-01-01' AND hive_updated_date <= '" + v_timestamp + "'"

    print "Running GPDB Count . . . . ."
    # print count_sql_gpdb

    try:
        conn_source, cur_source = dbConnect(src_dbName, dbsrc_User, dbsrc_Url,
                                            dbsrc_Pwd)
    except psycopg2.Error as e:
        err_msg = "Error connecting to source database"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        #continue
        return

    try:
        temp_table_sql = "CREATE TEMP TABLE count_" + input_source_schema + " AS " + count_sql_gpdb
        # print temp_table_sql
        cur_source.execute(temp_table_sql)
    except psycopg2.Error as e:
        print e
        err_msg = "Error while creating temp table in source"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.close()
        #continue
        return

    try:
        file = "/apps/staging/g00003/counts_" + input_source_schema + ".txt"
        gpdb_count_op_sql = "COPY count_" + input_source_schema + " TO STDOUT DELIMITER '|' NULL ''"
        pg_count_ip_sql = "COPY counts FROM STDIN DELIMITER '|' NULL ''"
        fo = open(file, 'w')
        cur_source.copy_expert(gpdb_count_op_sql, fo)
        fo.close()
        fi = open(file, 'r')
        cur_metadata.copy_expert(pg_count_ip_sql, fi)
        fi.close()
    except psycopg2.Error as e:
        err_msg = "Error while copying"
        print err_msg
        print e
        status = 'Job Error'
        output_msg = traceback.format_exc()
        conn_metadata.close()
        conn_source.close()
        #continue
        return
    conn_source.close()

    print "Running Hive Count. . . . . "

    try:
        conn_target, cur_target = dbConnectHive(dbtgt_host, dbtgt_Port,
                                                dbtgt_Auth)
    except Exception as e:
        try:
            conn_target, cur_target = dbConnectHive(dbtgt_host2, dbtgt_Port,
                                                    dbtgt_Auth)
        except Exception as e:
            err_msg = "Error while connecting to target database"
            status = 'Job Error'
            print e
            output_msg = e
            audit_logging(cur_metadata, load_id, run_id, plant_name,
                          system_name, job_name, tablename, status, data_path,
                          technology, rows_inserted, rows_updated,
                          rows_deleted, num_errors, err_msg, 0, 0, output_msg)
            conn_metadata.rollback()
            conn_metadata.close()
            conn_source.close()
            return

    count_view_sql = "CREATE OR REPLACE VIEW counts_" + input_source_schema + " AS " + count_sql_hive
    # print count_view_sql
    try:
        cur_target.execute(count_view_sql)
    except Exception as e:
        print e
        err_msg = "Error while creating  view"
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    count_query = "SELECT * FROM counts_" + input_source_schema

    try:
        cur_target.execute(count_query)
    except Exception as e:
        print e
        err_msg = "Error while executing count query"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    #results = {}
    #column = 0
    #for d in cur_target.description:
    #    results[d[0]] = column
    #    column = column + 1

    columnNames = [a['columnName'] for a in cur_target.getSchema()]
    # print columnNames
    try:
        count_df = pd.DataFrame(cur_target.fetchall(), columns=columnNames)
        file = "/apps/staging/g00003/counts_" + input_source_schema + ".txt"
        f1 = open(file, 'w')
        count_df.to_csv(path_or_buf=f1, sep='\t', header=False, index=False)
        f1.close()
    except Exception as e:
        print e
        err_msg = "Error while writing Data Frame into file"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    try:
        copy_sql = "COPY public.counts FROM STDIN WITH DELIMITER '\t'"
        fo = open(file)
        cur_metadata.copy_expert(copy_sql, fo)
        run_cmd([
            'rm', '-f',
            '/apps/staging/g00003/counts_' + input_source_schema + '.txt'
        ])
        err_msg = "Count completed successfully . . ."
        print err_msg
        error_table_list = input_source_schema
        conn_target.close()
    except Exception as e:
        print e
        err_msg = "Error while inserting data into final table"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_metadata.rollback()
        conn_metadata.close()
        conn_source.close()
        conn_target.close()
        return

    # Final log entry
    try:
        error = 0
        err_msg = 'No Errors'
        status = 'Job Finished'
        output_msg = 'Job Finished successfully'
        print output_msg
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename,status, \
                      data_path, technology,rows_inserted,rows_updated, rows_deleted, num_errors, err_msg ,0,0,output_msg)
    except psycopg2.Error as e:
        error = 15
        err_msg = "Error while dropping external table in target"
        print err_msg
        status = 'Job Error'
        output_msg = traceback.format_exc()
        print output_msg
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name,
                      job_name, tablename, status, data_path, technology,
                      rows_inserted, rows_updated, rows_deleted, num_errors,
                      err_msg, 0, 0, output_msg)
        conn_target.rollback()
        conn_target.close()
        conn_metadata.close()
        return error, err_msg, tablename

    conn_metadata.commit()
    conn_metadata.close()
コード例 #17
0
                config_list['misc_multiprocessor_max'])

    # Special logic to mirror table from one schema in GP to a different schema in HIVE
    if len(sys.argv) > 7 is not None:
        arg = str(sys.argv[7])
        if arg == 'Y':
            is_special_logic = True

    print(print_hdr + "load_type: " + load_type + " with " +
          str(input_multiprocessor) + " multiprocessor")

    error = 0
    load_id = -1
    conn_metadata = None
    try:
        conn_metadata, cur_metadata = dbConnect(metastore_dbName, dbmeta_User,
                                                dbmeta_Url, dbmeta_Pwd)

        metadata_sql = "SELECT id, source_schemaname, source_tablename, target_schemaname, target_tablename, load_type, \
                incremental_column, last_run_time, second_last_run_time, join_columns, log_mode, data_path \
                FROM sync.control_table \
                WHERE data_path = '" + data_path + "' \
                    AND status_flag = 'Y'"

        if load_group_id is not None:
            metadata_sql = metadata_sql + " AND load_group_id = " + str(
                load_group_id)
        if data_path.find("MIR2") <> -1:
            metadata_sql = metadata_sql + " AND target_schemaname = '" + input_schema + "'"
        elif data_path.find("GP2HDFS") <> -1:
            metadata_sql = metadata_sql + " AND source_schemaname = '" + input_schema + "'" \
                           + " AND (hvr_last_processed_value > last_run_time OR last_run_time IS NULL)"
コード例 #18
0
    def fs2hdfs_hive_log(self):
        hosts = []
        # Get information about the table to load
        try:
            metadata_sql = "SELECT * FROM sync.control_table \
                        WHERE target_tablename = 'hive_log_ext' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "before connecting to metastore controls")
            controls = dbQuery(cur_metadata, metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            return output_msg
            sys.exit(error)
        finally:
            conn_metadata.close()

        if not controls:
            error = 3
            err_msg = "No Entry found in control table".format(error)
            status = 'Job Error'
            output_msg = "No Entry found in control table"
            return output_msg
            sys.exit(error)

        self.id = str(controls[0]['id'])
        self.source_schema = str(controls[0]['source_schemaname'])
        self.source_tablename = str(controls[0]['source_tablename'])
        self.target_schema = str(controls[0]['target_schemaname'])
        self.target_tablename = str(controls[0]['target_tablename'])
        partitioned = controls[0]['is_partitioned']
        self.load_type = str(controls[0]['load_type'])
        self.s3_backed = controls[0]['s3_backed']
        first_partitioned_column = str(controls[0]['first_partitioned_column'])
        second_partitioned_column = str(
            controls[0]['second_partitioned_column'])
        partitioned_column_transformation = str(
            controls[0]['partition_column_transformation'])
        custom_sql = str(controls[0]['custom_sql'])
        self.join_columns = str(controls[0]['join_columns'])
        self.archived_enabled = controls[0]['archived_enabled']
        distribution_columns = str(controls[0]['distribution_columns'])
        dist_col_transformation = str(controls[0]['dist_col_transformation'])
        self.log_mode = str(controls[0]['log_mode'])
        self.last_run_time = str(controls[0]['last_run_time'])

        incoming_path = self.paths + "/hiveserver2.log"
        local_inprogress_path = self.local_staging_path + "/in_progress/"
        inprogress_path = self.staging_path + self.target_schema + "/" + self.target_tablename + "/in_progress/"
        hosts = self.hive_hosts.split(',')
        print hosts
        # Creating the Local in_progress and/or clearing that location for new incoming files
        for host in hosts:
            print "Inside Host path check"
            path_to_check = self.local_staging_path + host
            print path_to_check
            path_check = glob.glob(path_to_check)
            print path_check
            if len(path_check) > 0:
                print "Path exists... Clearing the directory"
                (ret, out, err) = run_cmd(['rm', '-rf', (path_to_check)])
                print(ret, out, err)
                if ret:
                    error = 1
                    err_msg = "Error while cleaning in_progress location in Local FS".format(
                        error)
                    print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                          err)
                    status = 'Job Error'
                    output_msg = traceback.format_exc()
                    print output_msg
                    sys.exit(error)
                    return output_msg

            (ret, out, err) = run_cmd(['mkdir', '-p', path_to_check])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg

        path_check = glob.glob(local_inprogress_path)
        if len(path_check) > 0:
            print "Path exists... Clearing the directory"
            (ret, out, err) = run_cmd(['rm', '-rf', (local_inprogress_path)])
            print(ret, out, err)
            if ret:
                error = 1
                err_msg = "Error while cleaning in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        (ret, out, err) = run_cmd(['mkdir', '-p', local_inprogress_path])
        if ret:
            error = 1
            err_msg = "Error while creating in_progress location in Local FS".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg

# Creating the HDFS in_progress location and/or clearing that location for new incoming files
        (ret, out,
         err) = run_cmd(["hadoop", "fs", "-test", "-e", inprogress_path])
        if ret:
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "Directory does not exist ... Creating...")
            (ret, out,
             err) = run_cmd(["hadoop", "fs", "-mkdir", "-p", inprogress_path])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in HDFS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        # else:
        #     (ret, out, err) = run_cmd(["hadoop", "fs", "-rm", "-r", inprogress_path + "*"])
        #     if ret:
        #         if err.find("No such file or directory") <> -1:
        #             pass
        #         else:
        #             error = 1
        #             err_msg = "Error while cleaning in_progress location in HDFS".format(error)
        #             print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
        #             status = 'Job Error'
        #             output_msg = traceback.format_exc()
        #             print output_msg
        #             return output_msg

# Checking the last run time of the table.
# Bringing the files from each host since the last run time
        from datetime import date, timedelta
        if self.last_run_time == 'None':
            self.last_run_time = str(datetime.now())
        print "Last Run Time : ", self.last_run_time
        lr_dt, lr_ts = self.last_run_time.split()
        lr_dt = datetime.strptime(lr_dt, "%Y-%m-%d").date()
        today = datetime.now().date()
        delta = today - lr_dt
        # hosts = self.hive_hosts.split(',')
        print hosts
        for host in hosts:
            (ret, out, err) = run_cmd([
                'scp', ('hdp@' + host + ':' + incoming_path),
                (self.local_staging_path + host + "/")
            ])
            print ret, out, err
            if ret > 0:
                error = 1
                err_msg = "Error while moving Current Log File to Local in_progress location".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg
            for i in range(delta.days):
                dt = (lr_dt + timedelta(days=i))
                dtstr = dt.isoformat()
                print dtstr
                (ret, out, err) = run_cmd([
                    'scp',
                    ('hdp@' + host + ':' + incoming_path + '.' + dtstr + '*'),
                    (self.local_staging_path + host + "/")
                ])
                print ret, out, err
                if ret > 0:
                    if err.find('No such file or directory') <> -1:
                        pass
                    else:
                        error = 1
                        err_msg = "Error while moving data to in_progress location".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print output_msg
                        sys.exit(error)
                        return output_msg

# Unzipping the files if there are any zipped files
        for host in hosts:
            files = glob.glob((self.local_staging_path + host + "/*"))
            for file in files:
                if file.find(".gz") <> -1:
                    try:
                        with gzip.open(file, 'rb') as f_in:
                            with open((file.replace('.gz', '_') + host),
                                      'wb') as f_out:
                                shutil.copyfileobj(f_in, f_out)
                    except Exception as e:
                        error = 4
                        err_msg = "Error while unzipping file in Local FS"
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                    #(ret,out,err)       = run_cmd(['gunzip', '-c', file, ' > ','test')])
                    # (ret, out, err) = run_cmd(['gunzip', file])
                    #(ret, out, err) = run_cmd(['zcat',  file, '>', (file.replace('.gz', '_') + host)])
                    # if ret > 0:
                    #     error = 1
                    #     err_msg = "Error while unzipping file in Local FS".format(error)
                    #     print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                    #     status = 'Job Error'
                    #     output_msg = traceback.format_exc()
                    #     print err_msg, output_msg
                    #     return output_msg
                    (ret, out, err) = run_cmd(['rm', '-f', file])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while removing zipped file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                else:
                    (ret, out,
                     err) = run_cmd(['mv', file, (file + '_' + host)])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while renaming file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg

# Moving the final set of files to the in_progress location to send it to HDFS
            move_files((self.local_staging_path + host + "/*"),
                       local_inprogress_path)
            if ret > 0:
                error = 1
                err_msg = "Error while moving files to in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg


# Ingesting to HDFS

        (ret, out, err) = run_cmd([
            'hadoop', 'distcp', '-overwrite',
            'file:///' + (local_inprogress_path + "/*"),
            'hdfs:///' + inprogress_path
        ])
        if ret > 0:
            error = 1
            err_msg = "Error while moving files to HDFS from Local in_progress path".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print err_msg, output_msg
            sys.exit(error)
            return output_msg

        try:
            metadata_sql = "UPDATE sync.control_table SET last_run_time = now() \
                        WHERE target_tablename = 'hive_log' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            cur_metadata.execute(metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg
        finally:
            conn_metadata.close()
コード例 #19
0
ファイル: file2hdfs.py プロジェクト: mayukhghoshme/myProject
    def fs2hdfs(self):
        self.technology = 'Python'
        self.system_name = 'HDFS'
        self.job_name = 'FS-->HDFS'
        t = datetime.fromtimestamp(time.time())
        v_timestamp = str(t.strftime('%Y-%m-%d %H:%M:%S'))
        tablename = self.target_schemaname + "." + self.target_tablename

        try:
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
        except psycopg2.Error as e:
            error = 1
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            return error, err_msg, output_msg

        try:
            run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
            run_id_lists = dbQuery(cur_metadata, run_id_sql)
            run_id_list = run_id_lists[0]
            run_id = run_id_list['nextval']
            print "Run ID for the table", tablename, " is : ", run_id
        except Exception as e:
            print e
            error = 2
            err_msg = "Error while getting Run ID"
            status = "Job Error"
            output_msg = traceback.format_exc()
            audit_logging(cur_metadata, self.load_id, 0, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
        status = 'Job Started'
        error = 0
        err_msg = ''
        output_msg = ''
        audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                      self.system_name, self.job_name, tablename, status,
                      self.data_path, self.technology, 0, 0, 0, error, err_msg,
                      0, 0, output_msg)

        if len(self.source_schemaname) > 0 and len(self.source_tablename) > 0:
            local_file_name = self.source_schemaname + self.source_tablename
        elif len(self.source_schemaname) > 0 and len(
                self.source_tablename) == 0:
            local_file_name = self.source_schemaname
        elif len(self.source_schemaname) == 0 and len(
                self.source_tablename) > 0:
            local_file_name = self.source_tablename
        else:
            error = 2
            err_msg = "No source to run this program"
            output = "No source to run this program"
            status = 'Job Error'
            print err_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename
        print local_file_name
        try:
            files = glob.glob(local_file_name)
            if len(files) == 0:
                error = 3
                err_msg = "No data found"
                output = "No data found"
                print err_msg
                return error, err_msg, tablename
            else:
                self.target_path = self.hive_warehouse_path + "/" + self.target_schemaname + ".db/" + self.target_tablename + "/"
                (ret, out, err) = run_cmd(
                    ['hadoop', 'fs', '-rm', '-r', (self.target_path + "*")])
                if ret:
                    if err.find("No such file or directory") <> -1:
                        (ret, out, err) = run_cmd(
                            ['hadoop', 'fs', '-mkdir', self.target_path])
                        if ret:
                            pass
                    else:
                        error = 4
                        err_msg = "Error in cleaning in target path"
                        output = traceback.format_exc()
                        return error, err_msg, tablename
        except Exception as e:
            error = 5
            err_msg = "Error while checking the local file path or cleaning the target location in HDFS"
            output = traceback.format_exc()
            status = 'Job Error'
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        try:
            files = glob.glob(local_file_name)
            for file in files:
                (ret, out, err) = run_cmd(
                    ['hadoop', 'fs', '-copyFromLocal', file, self.target_path])
                if ret > 0:
                    error = 5
                    err_msg = "Error in ingesting into HDFS"
                    output = traceback.format_exc()
                    return error, err_msg, tablename
        except Exception as e:
            error = 6
            err_msg = "Error while loading data into HDFS"
            output = traceback.format_exc()
            status = 'Job Error'
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        try:
            update_control_info_sql = "UPDATE sync.control_table set last_run_time = '" + v_timestamp + "' where id = " + str(
                self.id
            ) + " AND target_schemaname = '" + self.target_schemaname + "' AND target_tablename = '" + self.target_tablename + "' AND data_path = '" + self.data_path + "'"
            print update_control_info_sql
            cur_metadata.execute(update_control_info_sql)
        except psycopg2.Error as e:
            print e
            error = 7
            err_msg = "Error while updating the control table"
            print err_msg
            status = 'Job Error'
            output_msg = traceback.format_exc()
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename

        # Final log entry
        try:
            error = 0
            err_msg = 'No Errors'
            status = 'Job Finished'
            output_msg = 'Job Finished successfully'
            print output_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)

        except psycopg2.Error as e:
            error = 15
            err_msg = "Error while dropping external table in target"
            print err_msg
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            audit_logging(cur_metadata, self.load_id, run_id, self.plant_name,
                          self.system_name, self.job_name, tablename, status,
                          self.data_path, self.technology, 0, 0, 0, error,
                          err_msg, 0, 0, output_msg)
            return error, err_msg, tablename
        finally:
            conn_metadata.close()
        return error, err_msg, tablename
コード例 #20
0
def fn_call(load_id,fn_name):
    config              = read_config(['/apps/common/environ.properties'])
    env                 = config.get('branch', 'env')
    metastore_dbName    = config.get(env + '.meta_db', 'dbName')
    dbmeta_Url          = config.get(env + '.meta_db', 'dbUrl')
    dbmeta_User         = config.get(env + '.meta_db', 'dbUser')
    dbmeta_Pwd          = base64.b64decode(config.get(env + '.meta_db', 'dbPwd'))

    dbtgt_Url_predix_wto    = config.get(env + '.tgt_db_predix_wto', 'dbUrl')
    dbtgt_User_predix_wto   = config.get(env + '.tgt_db_predix_wto', 'dbUser')
    dbtgt_dbName_predix_wto = config.get(env + '.tgt_db_predix_wto', 'dbName')
    dbtgt_Pwd_predix_wto    = base64.b64decode(config.get(env + '.tgt_db_predix_wto', 'dbPwd'))
    dbtgt_dbName_port_wto   = config.get(env + '.tgt_db_predix_wto', 'dbPort')

    try:
        conn_metadata, cur_metadata = dbConnect(metastore_dbName, dbmeta_User, dbmeta_Url, dbmeta_Pwd)
        run_id_sql = "select nextval('sbdt.edl_run_id_seq')"
        run_id_lists = dbQuery(cur_metadata, run_id_sql)
        run_id_list = run_id_lists[0]
        run_id = run_id_list['nextval']
        status= 'Job Started'
        plant_name = 'GE Transportation'
        system_name = 'WTO Predix'
        job_name = 'WTO Predix - Trigger DB Function'
        tablename = 'WTO Predix'
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename,status, '', 'Python', 0, 0, 0, 0, '',0, 0, '')
    except Exception as e:
        error = 1
        err_msg = "Error: connecting to logging database while making first audit entry"
        print err_msg
        # sendMail(emailSender, emailReceiver, err_msg, tablename, load_id, env, "ERROR","DataIKU Backup", '')
        return error, err_msg

    try:
        conn_target, cur_target = txn_dbConnect(dbtgt_dbName_predix_wto, dbtgt_User_predix_wto, dbtgt_Url_predix_wto,dbtgt_Pwd_predix_wto, dbtgt_dbName_port_wto)

    except Exception as e:
        error = 2
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while connecting to the Target Predix Database"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        return error, err_msg

    try:
        if fn_name.find("(") <> -1 and fn_name.find(")") <> -1:
            fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name)
        else:
            fn_result = dbQuery(cur_target, "SELECT * FROM " + fn_name + "()")
        # print fn_result
        # print fn_result[0]['proc_wto_wheel_data']
    except Exception as e:
        error = 3
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Error while running the Predix Function"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    if str(fn_result[0]['proc_wto_wheel_data']) == 'False' or str(fn_result[0]['proc_wto_wheel_data']) == '1':
        print "Function returned False in the Predix Database. Please check the function for more details"
        error = 4
        status = 'Job Error'
        output_msg = traceback.format_exc()
        err_msg = "Function returned False in the Predix Database. Please check the function for more details"
        audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python',0, 0, 0, 0, err_msg, 0, 0, output_msg)
        conn_metadata.close()
        conn_target.close()
        return error, err_msg

    status = 'Job Finished'
    err_msg = ''
    audit_logging(cur_metadata, load_id, run_id, plant_name, system_name, job_name, tablename, status, '', 'Python', 0,0, 0, 0, '', 0, 0, '')
    conn_metadata.close()
    conn_target.close()