def log_load_start(self):
     upd_sql = '''
     update dw.log_stock_transaction
     set load_start_time = '{0}'
     where row_id = {1} and stock_id = '{2}' and biz_date = '{3}'
     '''.format(time.ctime(), self.log_row_id, self.stock_id, self.date)
     get_query_result(self.conn, upd_sql)
     self.conn.commit()
 def delete_existing_records(self):
     del_sql = """
     delete from dw.stock_transaction where stock_id = '{0}' and biz_date = '{1}'
     """.format(
         self.stock_id, datetime.datetime.strptime(self.date, "%Y%m%d")
     )
     get_query_result(self.conn, del_sql)
     print_log("Deletion for {0} {1} completed successfully.".format(self.stock_id, self.date))
 def delete_existing_records(self):
     del_sql = '''
     delete from dw.stock_transaction where stock_id = '{0}' and biz_date = '{1}'
     '''.format(self.stock_id,
                datetime.datetime.strptime(self.date, '%Y%m%d'))
     get_query_result(self.conn, del_sql)
     print_log('Deletion for {0} {1} completed successfully.'.format(
         self.stock_id, self.date))
Example #4
0
 def execute(self):
     p = subprocess.Popen(self.cmd, shell=True)
     return_code = p.wait()
     
     if return_code == 0:
         get_query_result(self.conn, "update dw.job set end_time = '{0}', is_success = 'Y' where row_id = {1}".format(time.ctime(), self.row_id))
     else:
         get_query_result(self.conn, "update dw.job set end_time = '{0}', is_success = 'N' where row_id = {1}".format(time.ctime(), self.row_id))
 def log_load_end(self, is_success=True):
     upd_sql = '''
     update dw.log_stock_transaction
     set load_end_time = '{0}', is_load_success = '{1}'
     where row_id = {2} and stock_id = '{3}' and biz_date = '{4}'
     '''.format(time.ctime(), 'Y' if is_success else 'N', self.log_row_id,
                self.stock_id, self.date)
     get_query_result(self.conn, upd_sql)
     self.conn.commit()
 def log_load_end(self, is_success=True):
     upd_sql = """
     update dw.log_stock_transaction
     set load_end_time = '{0}', is_load_success = '{1}'
     where row_id = {2} and stock_id = '{3}' and biz_date = '{4}'
     """.format(
         time.ctime(), "Y" if is_success else "N", self.log_row_id, self.stock_id, self.date
     )
     get_query_result(self.conn, upd_sql)
     self.conn.commit()
 def log_load_start(self):
     upd_sql = """
     update dw.log_stock_transaction
     set load_start_time = '{0}'
     where row_id = {1} and stock_id = '{2}' and biz_date = '{3}'
     """.format(
         time.ctime(), self.log_row_id, self.stock_id, self.date
     )
     get_query_result(self.conn, upd_sql)
     self.conn.commit()
Example #8
0
 def pre_execute(self):
     self.set_command()
     if not self.cmd is None and len(self.cmd) > 0:
         #-- get row_id
         self.row_id = get_query_result(self.conn, "select nextval('dw.seq_job_row_id') as row_id")[0]["row_id"]
         ins_sql = "insert into dw.job(row_id, name, start_time) values({0}, '{1}', '{2}')".format(self.row_id, self.name, time.ctime())
         get_query_result(self.conn, ins_sql)
         self.conn.commit()
     else:
         raise RuntimeError('No command needs to be executed.')
 def check_row_id_existance(self):
     sel_sql = '''
     select count(*) as count from dw.log_stock_transaction where row_id = {0} and stock_id = '{1}' and biz_date = '{2}'
     '''.format(self.log_row_id, self.stock_id, self.date)
     row_count = get_query_result(self.conn, sel_sql)[0]['count']
     if row_count == 0:
         raise RuntimeError('Row id {0} is not found for {1}:{2}'.format(
             self.log_row_id, self.stock_id, self.date))
 def check_row_id_existance(self):
     sel_sql = """
     select count(*) as count from dw.log_stock_transaction where row_id = {0} and stock_id = '{1}' and biz_date = '{2}'
     """.format(
         self.log_row_id, self.stock_id, self.date
     )
     row_count = get_query_result(self.conn, sel_sql)[0]["count"]
     if row_count == 0:
         raise RuntimeError("Row id {0} is not found for {1}:{2}".format(self.log_row_id, self.stock_id, self.date))
Example #11
0
def check_job_status(conn, name):
    query_status_for_last_run = '''
    select t.is_success
    from (
        select is_success, 
            row_number() over(partition by name order by start_time desc) rk 
        from dw.job 
        where name = '{0}' and date(start_time) = '{1}' 
    ) t where t.rk = 1
    '''.format(name, date.today())
    rows = get_query_result(conn, query_status_for_last_run)
    if len(rows) == 0:
        status = 'N'
    else:
        status = rows[0]['is_success']
    return status if not status is None else 'N'
Example #12
0
def check_job_status(conn, name):
    query_status_for_last_run = '''
    select t.is_success
    from (
        select is_success, 
            row_number() over(partition by name order by start_time desc) rk 
        from dw.job 
        where name = '{0}' and date(start_time) = '{1}' 
    ) t where t.rk = 1
    '''.format(name, date.today())
    rows = get_query_result(conn, query_status_for_last_run)
    if len(rows) == 0:
        status = 'N'
    else:
        status = rows[0]['is_success']
    return status if not status is None else 'N'
def loader(queue, conn, start_date=options.start_date, end_date=options.end_date, stock_id=options.stock_id, merge_before_copy=options.merge_before_copy, enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date,'%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date,'%Y%m%d')
    
    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None: stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''
    
    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:  
        if merge_before_copy:
        # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i=0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i+=1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log('Deletion for biz_date {} completed successfully.'.format(cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST, DB_NAME, DB_UNAME, 'dw.stock_transaction', file_merged, DB_PORT, args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))
            
            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)
            
            cur_date_dt = cur_date_dt + datetime.timedelta(1)
            
        else:
            stock_list_sql_var_replaced = stock_list_sql.format(biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-','')
                stock_id = row['stock_id']
                while queue.full():
                    print_log('=================> queue is full, wait for 1 second...')
                    time.sleep(1)
                s = Stock_trans_loader(queue, conn, row_id, stock_id, biz_date, enable_copy=enable_copy )
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()
                    
            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log('=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)
def loader(queue,
           conn,
           start_date=options.start_date,
           end_date=options.end_date,
           stock_id=options.stock_id,
           merge_before_copy=options.merge_before_copy,
           enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date, '%Y%m%d')

    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None:
        stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:
        if merge_before_copy:
            # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i = 0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i += 1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(
                cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log(
                'Deletion for biz_date {} completed successfully.'.format(
                    cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST,
                           DB_NAME,
                           DB_UNAME,
                           'dw.stock_transaction',
                           file_merged,
                           DB_PORT,
                           args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))

            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

        else:
            stock_list_sql_var_replaced = stock_list_sql.format(
                biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-', '')
                stock_id = row['stock_id']
                while queue.full():
                    print_log(
                        '=================> queue is full, wait for 1 second...'
                    )
                    time.sleep(1)
                s = Stock_trans_loader(queue,
                                       conn,
                                       row_id,
                                       stock_id,
                                       biz_date,
                                       enable_copy=enable_copy)
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log(
            '=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)