def init_table(num=3):
    with sqlite3.connect(DB_NAME) as conn:
        cursor = conn.cursor()
        i = 0
        while i < num:
            try:
                cursor.execute(read_sql("create_tweet_table.sql"))
                break
            except sqlite3.OperationalError:
                cursor.execute(read_sql("delete_tweet_table.sql"))
                continue
Beispiel #2
0
def adjustIMSportWallet(SpecifyDate):
    
    member_profile = ft.read_dataframe('../../share/data_prod/' + 'member_profile.ft')
    MemberWallets = ft.read_dataframe('../../share/data_prod/MemberWallets/' + 'MemberWalletsFlow' + SpecifyDate.replace('-', '') + '.ft')

    # 撈取 IM 資料    
    IMBetRecord = pd.DataFrame()

    startList = ['2020-02-15', '2020-02-18', '2020-02-21', '2020-02-24', '2020-02-28', '2020-03-02', '2020-03-06']
    endList = ['2020-02-18', '2020-02-21', '2020-02-24', '2020-02-28', '2020-03-02', '2020-03-06', '2020-03-07']

    for start, end in zip(startList, endList):

        query = ''' SELECT [BetTime]
                          ,[PlayerName]
                          ,[BetAmount]
                          ,[NetAmount]
                          ,[PayoutTime]
                      FROM [dbo].[IMSportBetRecord] with(nolock)
                      where BetTime between '{}' and '{}'
                        and NetAmount = 0
                        and IsSettled = 1  '''.format(start, end)
        IMBetRecord = IMBetRecord.append(utils.read_sql('New', 'dbFetchStakeMain', query))


    # 需要 MemberId 來與 MemberWallets 合併,取得 地方錢包餘額 的呼叫時間
    IMBetRecord['lowerName'] = IMBetRecord.PlayerName.str.lower()
    IMBetRecord = IMBetRecord.merge(member_profile[['member_name', 'member_id']].rename(columns={'member_name':'lowerName', 'member_id':'MemberId'}), on = 'lowerName', how = 'left')
    IMBetRecord = IMBetRecord.merge(MemberWallets.query('ProductId == 17')[['MemberId', 'CreateTime']], on = 'MemberId', how = 'left')
    
    # 有些用戶並沒有在名單內(1年未上線回鍋不在要計算地方錢包餘額的名單內)
    IMBetRecord = IMBetRecord[~IMBetRecord.CreateTime.isna()]
    
    # 調整時差至 UTC+8
    IMBetRecord['BetTime'] += pd.Timedelta('8 hours')
    IMBetRecord['PayoutTime'] += pd.Timedelta('8 hours')
    
    # 需要的資料(取地方錢包餘額前用戶下注,取完後和局或取消局退回流水的資料)
    IMBetRecordNeedAdd = IMBetRecord.query('CreateTime >= BetTime & CreateTime < PayoutTime').groupby('MemberId').BetAmount.sum().reset_index()
    
    # 更新地方錢包金額
    for index in IMBetRecordNeedAdd.index:
        member = IMBetRecordNeedAdd.loc[index, 'MemberId']
        amount = IMBetRecordNeedAdd.loc[index, 'BetAmount']
        MemberWallets.loc[(MemberWallets.MemberId == member) & (MemberWallets.ProductId == 17), 'Balance'] += amount
        
    # 輸出更新資料
    MemberWallets.reset_index(drop=True).to_feather('../../share/data_prod/MemberWallets/' + 'MemberWalletsFlow' + SpecifyDate.replace('-', '') + '.ft')
Beispiel #3
0
# 中心錢包變動紀錄
MemberAccount = pd.DataFrame()
MemberAccountList = os.listdir(pathMemberAccountLog)
for name in MemberAccountList:
    if '.ft' in name:        
        # 2020年02月後才需要,地方錢包金額是從 2020/2/19 之後開始打
        if (int(name[-5:-3]) > 1) & (int(name[-9:-5]) >= 2020):
            MemberAccount = MemberAccount.append(ft.read_dataframe(pathMemberAccountLog + name))
MemberAccount = MemberAccount.query('AddTime < @SpecifyDate')
            
    
# 遊戲代號資料
query = ''' SELECT ProductId, Name
            FROM [LckBaseDB].[dbo].[Product] with(nolock) '''
productId = utils.read_sql('IDC_ALL', 'LckBaseDB', query)
productId = productId.query('~(ProductId.isin([4, 6, 8, 10, 11, 13, 16, 18, 24, 30]))')
# 技術債,過去的命名方式有與遊戲名稱並不統一
productId['lowerName'] = productId.Name.apply(lambda x:x.lower() if x != 'PG' else x)
productId.loc[productId.lowerName == '188casino', 'lowerName'] = 'grandsuite'
productId.loc[productId.lowerName == 'lbkeno', 'lowerName'] = 'keno'
productId.loc[productId.lowerName == 'mscasino', 'lowerName'] = 'msclub'
productId.loc[productId.lowerName == '188sport', 'lowerName'] = 'sportsbook'

def rawDataAppendNewRegMember(member_profile, MemberWallets, productId, SpecifyDate):
    
    #  要補的用戶名單
    allMember = list(MemberWallets.MemberId.unique())
    lossMember = set(member_profile.query('reg_date >= "2020-02-15" & reg_date < @SpecifyDate').member_id) - set(allMember)
    
    # 補上所有 ProductId 的資料
Beispiel #4
0
logChannel = '@數據-Aries'

yesterday = '2020-02-20'
today = '2020-03-07'

query = ''' SELECT [Id]
      ,[MemberId]
      ,[ProductId]
      ,[CreateTime]
      ,[UpdateTime]
      ,[Balance]
  FROM [LckTempDB].[dbo].[MemberWallets] with(nolock)
  where CreateTime between '{}' and '{}' '''.format(yesterday, today)
try:
    log2rocket('撈取地方錢包資料:開始', to=logChannel)
    df = utils.read_sql('IDC_ALL', 'LckTempDB', query)
    log2rocket('撈取地方錢包資料:成功', to=logChannel)
    # 已知錯誤
    log2rocket('修正已知資料錯誤', to=logChannel)
    # 地方錢包金額錯誤
    df.loc[(df.MemberId == 452500) & (df.ProductId == 22), 'Balance'] = 24390
    # 事前下注、事後和局
    df.loc[(df.MemberId == 156895) & (df.ProductId == 7), 'Balance'] += 4400
    # 賭神賽獎金
    df.loc[(df.MemberId == 214590) & (df.ProductId == 7), 'Balance'] += 20000
    # KG個人中彩
    df.loc[(df.MemberId == 185729) & (df.ProductId == 14), 'Balance'] += 40122
    # 輸出
    try:
        path = '../../share/data_prod/MemberWallets/'
        log2rocket('地方錢包資料落檔:開始', to=logChannel)
def search_all():
    with sqlite3.connect(DB_NAME) as conn:
        cursor = conn.cursor()
        targets = cursor.execute(read_sql("search_all_tweet_table.sql")).fetchall()
        return len(targets)
def insert(id, title, trans_title, time_str):
    with sqlite3.connect(DB_NAME) as conn:
        cursor = conn.cursor()
        data = [id, title, trans_title, time_str]
        cursor.executemany(read_sql("insert_tweet_table.sql"), [data])
        conn.commit()
Beispiel #7
0
"""
Script path: s3://621254586315.mach-big-data-edl/glue/read_files.py
IAM role: StackSet-airflow-prerequisites-RoleAirflowProduct-12CGR0NSB7T8V
Type: Spark
Glue version: Spark 2.4, Python 3
ETL language: Python
Temporary directory: s3://621254586315.mach-big-data-edl/glue/tmp
Python library path:
s3://621254586315.mach-big-data-edl/glue/base_jobs.py
Number of workers: 2
Job parameters:
--s3_path s3://621254586315.mach-big-data-edl/glue/save_data
--extra-py-files s3://621254586315.mach-big-data-edl/glue/extra_py_files/python_libs.zip,s3://621254586315.mach-big-data-edl/glue/extra_py_files/jsonschema-3.2.0-py2.py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/attrs-20.3.0-py2.py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/pyrsistent.zip,s3://621254586315.mach-big-data-edl/glue/extra_py_files/importlib_metadata-3.3.0-py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/zipp-3.4.0-py3-none-any.whl,s3://621254586315.mach-big-data-edl/glue/extra_py_files/typing_extensions-3.7.4.3-py3-none-any.whl
--extra-jars s3://621254586315.mach-big-data-edl/glue/extra_jars/json-serde.jar
--enable-glue-datacatalog
--encryption-type sse-s3
"""
from base_jobs import get_values
from utils import read_sql, df_to_parquet


args, spark_context, glue_context = get_values(['s3_path'])

query = """
    select * from data_warehouse.users
    limit 10
"""
df = read_sql(spark_context, query)

df_to_parquet(df, args['s3_path'])