def run(self):
     self.check_row_id_existance()
     self.queue.put(self.getName())
     self.log_load_start()
     self.delete_existing_records()
     try:
         if self.enable_copy:
             print_log('psql copy...')
             psql_copy_from(DB_HOST,
                            DB_NAME,
                            DB_UNAME,
                            'dw.stock_transaction',
                            self.file,
                            DB_PORT,
                            args=' with (encoding \'GBK\')')
         else:
             print_log('psql insert...')
             inserter(self.conn, TABLE, COLS, 'file', self.file, '\t')
         self.log_load_end(is_success=True)
         print_log(
             'Loading {stock_id} for {date} completes successfully.'.format(
                 stock_id=self.stock_id, date=self.date))
     except:
         traceback.print_exc()
         self.log_load_end(is_success=False)
         raise RuntimeError('Loading {stock_id} for {date} failed.'.format(
             stock_id=self.stock_id, date=self.date))
     finally:
         queue_name = self.queue.get()
Ejemplo n.º 2
0
 def run(self):
     self.check_row_id_existance()
     self.queue.put(self.getName())
     self.log_load_start()
     self.delete_existing_records()
     try:
         if self.enable_copy:
             print_log("psql copy...")
             psql_copy_from(
                 DB_HOST,
                 DB_NAME,
                 DB_UNAME,
                 "dw.stock_transaction",
                 self.file,
                 DB_PORT,
                 args=" with (encoding 'GBK')",
             )
         else:
             print_log("psql insert...")
             inserter(self.conn, TABLE, COLS, "file", self.file, "\t")
         self.log_load_end(is_success=True)
         print_log(
             "Loading {stock_id} for {date} completes successfully.".format(stock_id=self.stock_id, date=self.date)
         )
     except:
         traceback.print_exc()
         self.log_load_end(is_success=False)
         raise RuntimeError("Loading {stock_id} for {date} failed.".format(stock_id=self.stock_id, date=self.date))
     finally:
         queue_name = self.queue.get()
def loader(queue, conn, start_date=options.start_date, end_date=options.end_date, stock_id=options.stock_id, merge_before_copy=options.merge_before_copy, enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date,'%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date,'%Y%m%d')
    
    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None: stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''
    
    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:  
        if merge_before_copy:
        # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i=0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i+=1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log('Deletion for biz_date {} completed successfully.'.format(cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST, DB_NAME, DB_UNAME, 'dw.stock_transaction', file_merged, DB_PORT, args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))
            
            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)
            
            cur_date_dt = cur_date_dt + datetime.timedelta(1)
            
        else:
            stock_list_sql_var_replaced = stock_list_sql.format(biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-','')
                stock_id = row['stock_id']
                while queue.full():
                    print_log('=================> queue is full, wait for 1 second...')
                    time.sleep(1)
                s = Stock_trans_loader(queue, conn, row_id, stock_id, biz_date, enable_copy=enable_copy )
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()
                    
            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log('=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)
def loader(queue,
           conn,
           start_date=options.start_date,
           end_date=options.end_date,
           stock_id=options.stock_id,
           merge_before_copy=options.merge_before_copy,
           enable_copy=options.enable_copy):

    cur_date_dt = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_dt = datetime.datetime.strptime(end_date, '%Y%m%d')

    stock_list_sql = '''
    select row_id, biz_date, stock_id
    from dw.log_stock_transaction
    where biz_date = '{biz_date}'
    and is_download_success = 'Y'
    and (is_load_success = 'N' or is_load_success is null)
    '''
    if not stock_id is None:
        stock_list_sql = stock_list_sql + ' and stock_id = \'' + stock_id + '\''

    cur = get_cur(conn)
    while cur_date_dt <= end_date_dt:
        if merge_before_copy:
            # since load files one by one into table is taking too much time, the solution to boost the procedure is to merge all the pieces of files into one file and load the merge file into table, this takes less than 5 mins to complete.
            cur_date_str = cur_date_dt.strftime('%Y%m%d')
            working_dir = data_dir + SEP + cur_date_str
            file_merged = os.path.join(working_dir, "file_merged.csv")
            if os.path.exists(file_merged):
                warn_log('Removing old file: ' + file_merged)
                os.remove(file_merged)
            #-- Starting to merge files
            with open(file_merged, "a") as dest:
                i = 0
                for _, _, filenames in os.walk(working_dir):
                    for filename in fnmatch.filter(filenames, "[0-9]*.txt"):
                        with open(os.path.join(working_dir, filename)) as src:
                            shutil.copyfileobj(src, dest)
                        i += 1
                        print_log('Merged ' + str(i) + ' files.')
            #-- Deleting records from db
            del_sql = '''delete from dw.stock_transaction where biz_date = '{}' '''.format(
                cur_date_str)
            get_query_result(conn, del_sql)
            conn.commit()
            print_log(
                'Deletion for biz_date {} completed successfully.'.format(
                    cur_date_str))
            #-- Updating is_load_success to N in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'N' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to N')

            #++++++++ Starting to load the merged file into table
            psql_copy_from(DB_HOST,
                           DB_NAME,
                           DB_UNAME,
                           'dw.stock_transaction',
                           file_merged,
                           DB_PORT,
                           args=' with (encoding \'GBK\')')
            print_log('Successfully loaded {} into table.'.format(file_merged))

            #-- Updating is_load_success to Y in log table
            upd_sql = '''update dw.log_stock_transaction set is_load_success = 'Y' where biz_date = '{}' and is_download_success = 'Y' '''.format(
                cur_date_str)
            get_query_result(conn, upd_sql)
            conn.commit()
            print_log('is_load_success is updated to Y')

            #-- Cleaning up working dir
            os.remove(file_merged)

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

        else:
            stock_list_sql_var_replaced = stock_list_sql.format(
                biz_date=cur_date_dt)
            cur.execute(stock_list_sql_var_replaced)
            rows = list(cur)
            for row in rows:
                row_id = row['row_id']
                biz_date = str(row['biz_date']).replace('-', '')
                stock_id = row['stock_id']
                while queue.full():
                    print_log(
                        '=================> queue is full, wait for 1 second...'
                    )
                    time.sleep(1)
                s = Stock_trans_loader(queue,
                                       conn,
                                       row_id,
                                       stock_id,
                                       biz_date,
                                       enable_copy=enable_copy)
                s.start()
                print_log('-----> queue size: ' + str(queue.qsize()))
                conn.commit()

            cur_date_dt = cur_date_dt + datetime.timedelta(1)

    while not queue.empty():
        print_log(
            '=================> queue is not empty yet, wait for 1 second...')
        time.sleep(1)