def init_table(num=3): with sqlite3.connect(DB_NAME) as conn: cursor = conn.cursor() i = 0 while i < num: try: cursor.execute(read_sql("create_tweet_table.sql")) break except sqlite3.OperationalError: cursor.execute(read_sql("delete_tweet_table.sql")) continue
def adjustIMSportWallet(SpecifyDate): member_profile = ft.read_dataframe('../../share/data_prod/' + 'member_profile.ft') MemberWallets = ft.read_dataframe('../../share/data_prod/MemberWallets/' + 'MemberWalletsFlow' + SpecifyDate.replace('-', '') + '.ft') # 撈取 IM 資料 IMBetRecord = pd.DataFrame() startList = ['2020-02-15', '2020-02-18', '2020-02-21', '2020-02-24', '2020-02-28', '2020-03-02', '2020-03-06'] endList = ['2020-02-18', '2020-02-21', '2020-02-24', '2020-02-28', '2020-03-02', '2020-03-06', '2020-03-07'] for start, end in zip(startList, endList): query = ''' SELECT [BetTime] ,[PlayerName] ,[BetAmount] ,[NetAmount] ,[PayoutTime] FROM [dbo].[IMSportBetRecord] with(nolock) where BetTime between '{}' and '{}' and NetAmount = 0 and IsSettled = 1 '''.format(start, end) IMBetRecord = IMBetRecord.append(utils.read_sql('New', 'dbFetchStakeMain', query)) # 需要 MemberId 來與 MemberWallets 合併,取得 地方錢包餘額 的呼叫時間 IMBetRecord['lowerName'] = IMBetRecord.PlayerName.str.lower() IMBetRecord = IMBetRecord.merge(member_profile[['member_name', 'member_id']].rename(columns={'member_name':'lowerName', 'member_id':'MemberId'}), on = 'lowerName', how = 'left') IMBetRecord = IMBetRecord.merge(MemberWallets.query('ProductId == 17')[['MemberId', 'CreateTime']], on = 'MemberId', how = 'left') # 有些用戶並沒有在名單內(1年未上線回鍋不在要計算地方錢包餘額的名單內) IMBetRecord = IMBetRecord[~IMBetRecord.CreateTime.isna()] # 調整時差至 UTC+8 IMBetRecord['BetTime'] += pd.Timedelta('8 hours') IMBetRecord['PayoutTime'] += pd.Timedelta('8 hours') # 需要的資料(取地方錢包餘額前用戶下注,取完後和局或取消局退回流水的資料) IMBetRecordNeedAdd = IMBetRecord.query('CreateTime >= BetTime & CreateTime < PayoutTime').groupby('MemberId').BetAmount.sum().reset_index() # 更新地方錢包金額 for index in IMBetRecordNeedAdd.index: member = IMBetRecordNeedAdd.loc[index, 'MemberId'] amount = IMBetRecordNeedAdd.loc[index, 'BetAmount'] MemberWallets.loc[(MemberWallets.MemberId == member) & (MemberWallets.ProductId == 17), 'Balance'] += amount # 輸出更新資料 MemberWallets.reset_index(drop=True).to_feather('../../share/data_prod/MemberWallets/' + 'MemberWalletsFlow' + SpecifyDate.replace('-', '') + '.ft')
# 中心錢包變動紀錄 MemberAccount = pd.DataFrame() MemberAccountList = os.listdir(pathMemberAccountLog) for name in MemberAccountList: if '.ft' in name: # 2020年02月後才需要,地方錢包金額是從 2020/2/19 之後開始打 if (int(name[-5:-3]) > 1) & (int(name[-9:-5]) >= 2020): MemberAccount = MemberAccount.append(ft.read_dataframe(pathMemberAccountLog + name)) MemberAccount = MemberAccount.query('AddTime < @SpecifyDate') # 遊戲代號資料 query = ''' SELECT ProductId, Name FROM [LckBaseDB].[dbo].[Product] with(nolock) ''' productId = utils.read_sql('IDC_ALL', 'LckBaseDB', query) productId = productId.query('~(ProductId.isin([4, 6, 8, 10, 11, 13, 16, 18, 24, 30]))') # 技術債,過去的命名方式有與遊戲名稱並不統一 productId['lowerName'] = productId.Name.apply(lambda x:x.lower() if x != 'PG' else x) productId.loc[productId.lowerName == '188casino', 'lowerName'] = 'grandsuite' productId.loc[productId.lowerName == 'lbkeno', 'lowerName'] = 'keno' productId.loc[productId.lowerName == 'mscasino', 'lowerName'] = 'msclub' productId.loc[productId.lowerName == '188sport', 'lowerName'] = 'sportsbook' def rawDataAppendNewRegMember(member_profile, MemberWallets, productId, SpecifyDate): # 要補的用戶名單 allMember = list(MemberWallets.MemberId.unique()) lossMember = set(member_profile.query('reg_date >= "2020-02-15" & reg_date < @SpecifyDate').member_id) - set(allMember) # 補上所有 ProductId 的資料
logChannel = '@數據-Aries' yesterday = '2020-02-20' today = '2020-03-07' query = ''' SELECT [Id] ,[MemberId] ,[ProductId] ,[CreateTime] ,[UpdateTime] ,[Balance] FROM [LckTempDB].[dbo].[MemberWallets] with(nolock) where CreateTime between '{}' and '{}' '''.format(yesterday, today) try: log2rocket('撈取地方錢包資料:開始', to=logChannel) df = utils.read_sql('IDC_ALL', 'LckTempDB', query) log2rocket('撈取地方錢包資料:成功', to=logChannel) # 已知錯誤 log2rocket('修正已知資料錯誤', to=logChannel) # 地方錢包金額錯誤 df.loc[(df.MemberId == 452500) & (df.ProductId == 22), 'Balance'] = 24390 # 事前下注、事後和局 df.loc[(df.MemberId == 156895) & (df.ProductId == 7), 'Balance'] += 4400 # 賭神賽獎金 df.loc[(df.MemberId == 214590) & (df.ProductId == 7), 'Balance'] += 20000 # KG個人中彩 df.loc[(df.MemberId == 185729) & (df.ProductId == 14), 'Balance'] += 40122 # 輸出 try: path = '../../share/data_prod/MemberWallets/' log2rocket('地方錢包資料落檔:開始', to=logChannel)
def search_all(): with sqlite3.connect(DB_NAME) as conn: cursor = conn.cursor() targets = cursor.execute(read_sql("search_all_tweet_table.sql")).fetchall() return len(targets)
def insert(id, title, trans_title, time_str): with sqlite3.connect(DB_NAME) as conn: cursor = conn.cursor() data = [id, title, trans_title, time_str] cursor.executemany(read_sql("insert_tweet_table.sql"), [data]) conn.commit()
""" Script path: s3://621254586315.mach-big-data-edl/glue/read_files.py IAM role: StackSet-airflow-prerequisites-RoleAirflowProduct-12CGR0NSB7T8V Type: Spark Glue version: Spark 2.4, Python 3 ETL language: Python Temporary directory: s3://621254586315.mach-big-data-edl/glue/tmp Python library path: s3://621254586315.mach-big-data-edl/glue/base_jobs.py Number of workers: 2 Job parameters: --s3_path s3://621254586315.mach-big-data-edl/glue/save_data --extra-py-files s3://621254586315.mach-big-data-edl/glue/extra_py_files/python_libs.zip,s3://621254586315.mach-big-data-edl/glue/extra_py_files/jsonschema-3.2.0-py2.py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/attrs-20.3.0-py2.py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/pyrsistent.zip,s3://621254586315.mach-big-data-edl/glue/extra_py_files/importlib_metadata-3.3.0-py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/zipp-3.4.0-py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/typing_extensions-3.7.4.3-py3-none-any.whl --extra-jars s3://621254586315.mach-big-data-edl/glue/extra_jars/json-serde.jar --enable-glue-datacatalog --encryption-type sse-s3 """ from base_jobs import get_values from utils import read_sql, df_to_parquet args, spark_context, glue_context = get_values(['s3_path']) query = """ select * from data_warehouse.users limit 10 """ df = read_sql(spark_context, query) df_to_parquet(df, args['s3_path'])