def get_create(tagname, the_type): tag_database = Getconf.readfile().get('dbs', 'tag') the_command = "impala-shell -B -q \" " \ "create external table if not exists %s.%s(user_id String,%s %s) " \ "row format delimited fields terminated by '\001' " \ "stored as parquet tblproperties ('parquet.compress'='SNAPPY'); \"" return the_command % (tag_database, tagname, tagname, the_type)
def fusion_operation(agg_operator, tag_name): tag_database = Getconf.readfile().get('dbs', 'tag') the_insert = "impala-shell -B -q \" insert overwrite table %s.%s " if agg_operator == 0: filed_sql = "select user_id,count(%s) as %s from %s.dws_user_behavior where user_id!='NULL' and behavior_code='%s'" elif agg_operator == 1: filed_sql = "select user_id,sum(%s) as %s from %s.dws_user_behavior where user_id!='NULL' and behavior_code='%s'" elif agg_operator == 2: filed_sql = "select user_id,max(%s) as %s from %s.dws_user_behavior where user_id!='NULL' and behavior_code='%s'" elif agg_operator == 3: filed_sql = "select user_id,min(%s) as %s from %s.dws_user_behavior where user_id!='NULL' and behavior_code='%s'" elif agg_operator == 4: filed_sql = "select user_id,avg(%s) as %s from %s.dws_user_behavior where user_id!='NULL' and behavior_code='%s'" else: TheUtiles.send_email( " %s Script parameter content error,needs to be 0 to 4:(aggOperator=%s)" % (tag_name, agg_operator)) raise RuntimeError( "➤➤➤➤➤Pay attention to: aggOperator parameter content error,needs to be 0 to 4:(aggOperator=%s)" % agg_operator) return the_insert % (tag_database, tag_name) + filed_sql
def get_index_logic(the_params): be_database = Getconf.readfile().get('dbs', 'behavior') body = the_params['body'] event_code = body['eventCode'] propertycode = body['propertyCode'] # 0: 事件发生总次数; 1: 总和; 2: 最大值; 3: 最小值; 4: 均值; 5: 去重数。 agg_operator = body['aggOperator'] conditions = the_params['conditions'] # 与或 condition_operator = the_params['conditionOperator'] tag_name = the_params['tagName'] # 获取源字段的key # the_key = str(event_code) + "_" + str(propertycode) + "_" + str(agg_operator) # source_field = Getconf.get_source_field().get('source_field', str(the_key)) # 拼接逻辑 filed_sql = TheUtiles.fusion_operation(agg_operator, tag_name) filed_sql_time = TheUtiles.join_time(body, filed_sql, tag_name) filed_sql = filed_sql_time[0] % (propertycode, tag_name, be_database, event_code, filed_sql_time[1], filed_sql_time[2]) the_logic = TheUtiles.join_rule(filed_sql, conditions, condition_operator, tag_name) return the_logic
def excute_logic(all_tag): tmp_database = Getconf.readfile().get('dbs', 'UserProfile_tmp') user_database = Getconf.readfile().get('dbs', 'UserProfile') tag_database = Getconf.readfile().get('dbs', 'tag') dim_database = Getconf.readfile().get('dbs', 'user') tmp_dir = Getconf.readfile().get('dir', 'UserProfile_tmp') user_dir = Getconf.readfile().get('dir', 'UserProfile') delete_tmp_table = "impala-shell -q \" drop table %s.dws_user_tag_tmp \"" the_first = os.popen(delete_tmp_table % tmp_database).read() first_tmp = os.popen("hdfs dfs -rm -r %s/dws_user_tag_tmp" % tmp_dir).read() # 获取画像字段及类型 # 临时表,与画像表完全一样,用户、标签、画像 the_create = "impala-shell -B -q \" create external table %s.%s(" t_filed = "" tag_type = "impala-shell -B --output_delimiter '|' -q \"desc %s.dws_user_tag;\"" % user_database name_type = os.popen(tag_type).read() the_list = name_type[:-1].split("\n") the_filed = Getconf.readfile().get('dim', 'filed') exclude_filed = the_filed.split(",") for li in the_list: tag_and_type = li.split("|") if tag_and_type[0] in exclude_filed: t_filed = t_filed + "%s %s," % (tag_and_type[0], tag_and_type[1]) fu = {} for li in the_list: tag_and_type = li.split("|") fu[tag_and_type[0]] = tag_and_type[1] for li in all_tag: t_type = fu[li] t_filed = t_filed + "%s %s," % (li, t_type) for li in the_list: tag_and_type = li.split("|") if tag_and_type[0] not in exclude_filed and tag_and_type[ 0] not in all_tag: t_filed = t_filed + "%s %s," % (tag_and_type[0], tag_and_type[1]) the_create = the_create % ( tmp_database, "dws_user_tag_tmp" ) + t_filed[:-1] + ") row format delimited fields terminated by '\001' stored as parquet tblproperties ('parquet.compress'='SNAPPY'); " # 此处需加入用户维表 merge_sql = " select " the_join = " left join " # 需排除用户维的字段 for mer in exclude_filed: merge_sql = merge_sql + "a.%s," % mer for tag in all_tag: if tag not in exclude_filed: merge_sql = merge_sql + "%s.%s," % (tag, tag) the_join = the_join + " %s.%s on a.user_id=%s.user_id left join " % ( tag_database, tag, tag) # 第三张表 for third in the_list: tag_and_type = third.split("|") third_tag = tag_and_type[0] if third_tag not in exclude_filed and third_tag not in all_tag: merge_sql = merge_sql + "dut.%s," % third_tag the_join = the_join + " %s.dws_user_tag as dut on a.user_id=dut.user_id; \" 2>&1" % user_database merge_sql = merge_sql[:-1] + " from %s.dim_user as a " % dim_database merge_sql = " insert overwrite table %s.dws_user_tag_tmp " % tmp_database + merge_sql + the_join the_last_sql = the_create + merge_sql print(the_last_sql) # 建表并插入数据至临时表 result = os.popen(the_last_sql).read() print(result) if str(result).find("Table has been created") != -1 and str( result).find("ERROR") == -1: # 成功 delete_table = "impala-shell -q \" drop table %s.dws_user_tag \"" % user_database one = os.popen(delete_table).read() if str(one).find("Table has been dropped") == -1: MergeLogic.update_mysql(all_tag) MergeLogic.send_email( 'merge script failed to delete table, Portrait table data not updated' ) raise RuntimeError( "➤➤➤➤➤Pay attention to: Delete the table failed") # 改名,并判断是否有今日数据与三天前副本,有则删 now_time = datetime.datetime.now() three_ago = now_time - datetime.timedelta(days=3) name_suffix = str(now_time)[:10] + "_" + str( now_time)[11:13] + "_" + str(now_time)[14:16] + "_" + str( now_time)[17:19] + "_" + str(now_time)[20:22] # 判断目录存在 is_exit = os.popen( "hadoop fs -find %s/ -iname 'dws_user_tag_%s*'" % (user_dir, str(now_time)[:10])).read() two = os.popen( "hdfs dfs -mv %s/dws_user_tag %s/dws_user_tag_%s;echo $?" % (user_dir, user_dir, name_suffix)).read() exit_list = os.popen( "hadoop fs -find %s/ -iname 'dws_user_tag_%s*'" % (user_dir, str(now_time)[:10])).read() three_exit = os.popen( "hadoop fs -find %s/ -iname 'dws_user_tag_%s*'" % (user_dir, str(three_ago)[:10])).read() if two[:-1] == "0" and is_exit[:-1] != "": # 存在 dir_list = exit_list[:-1].split('\n') dir_one = dir_list[0][-11:-9] + dir_list[0][-8:-6] + dir_list[ 0][-5:-3] + dir_list[0][-2:] dir_two = dir_list[1][-11:-9] + dir_list[1][-8:-6] + dir_list[ 1][-5:-3] + dir_list[1][-2:] rf = "" if int(dir_two) > int(dir_one): rf = os.popen("hdfs dfs -rm -r %s 2>&1" % dir_list[0]).read() else: rf = os.popen("hdfs dfs -rm -r %s 2>&1" % dir_list[1]).read() if str(rf).find("INFO fs.TrashPolicyDefault: Moved") == -1: MergeLogic.send_email('Deleting extra copies today failed') if three_exit != "": # 存在 rm = os.popen("hdfs dfs -rm -r %s 2>&1" % three_exit[:-1]).read() if str(rm).find("INFO fs.TrashPolicyDefault: Moved") == -1: MergeLogic.send_email( 'Failed to delete the first three days of the copy') # 失败则新建表并插入数据 three = os.popen( "impala-shell -q \" ALTER TABLE %s.dws_user_tag_tmp RENAME TO %s.dws_user_tag \" 2>&1" % (tmp_database, user_database)).read() four = os.popen( "hdfs dfs -mv %s/dws_user_tag_tmp %s/dws_user_tag" % (tmp_dir, user_dir)).read() ip = Getconf.readfile().get('hdfs', 'ip') # 映射失败则标签表中无数据 five = os.popen( "impala-shell -q \" alter table %s.dws_user_tag set location 'hdfs://%s:8020%s/dws_user_tag' \" 2>&1" % (user_database, ip, user_dir)).read() MergeLogic.update_mysql(all_tag) if str(three).find("Renaming was successful") == -1 or str( five).find("New location has been set") == -1: MergeLogic.send_email( "The merge script fails,The portrait table will reset to the previous data" ) # 删除画像表,及其目录,新建并插入数据 delete_tag = "impala-shell -q \" drop table %s.dws_user_tag \"" % user_database os.popen(delete_tag) os.popen("hdfs dfs -rm -r %s/dws_user_tag" % user_dir) # 使用load old_tag = os.popen(the_create % "dws_user_tag").read() load_ok = os.popen( "hive -e \"load data inpath '%s/dws_user_tag_%s/*' overwrite into table %s.dws_user_tag\" 2>&1" % (user_dir, name_suffix, user_database)).read() six = os.popen( "impala-shell -q \" refresh %s.dws_user_tag \"" % user_database) sev = os.popen( "hdfs dfs -cp %s/dws_user_tag/* %s/dws_user_tag_%s " % (user_dir, user_dir, name_suffix)) if str(old_tag).find("Table has been created") == -1 or str( load_ok).find("FAILED") != -1: MergeLogic.send_email( 'Reset failed for portrait table!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) raise RuntimeError( "➤➤➤➤➤Pay attention to: Reset failed for portrait table!" ) else: # 失败 MergeLogic.update_mysql(all_tag) MergeLogic.send_email( 'Failed to create table or insert data, portrait data is the previous day' ) raise RuntimeError("建表或插入数据失败")
def get_predilection_logic(the_params): tag_database = Getconf.readfile().get('dbs', 'tag') be_database = Getconf.readfile().get('dbs', 'behavior') body = the_params['body'] tag_name = the_params['tagName'] conditions = the_params['conditions'] condition_operator = the_params['conditionOperator'] operator = body['operator'] event_code = body['eventCode'] condition_property = body['conditionProperty'] topn = body['topN'] time_sql = TheUtiles.join_time(body, "", tag_name) time_rule = TheUtiles.join_rule( time_sql[0] % (time_sql[1], time_sql[2]), conditions, condition_operator, tag_name) if operator == 0: target_property = body['targetProperty'] the_insert = "impala-shell -B -q \" insert overwrite table %s.%s " numeric = """ select the_two.user_id ,group_concat(cast(the_two.%s as string),',') as %s from ( select the_one.* from ( select user_id ,%s ,%s ,ROW_NUMBER() over(PARTITION BY user_id order by %s desc) num from %s.dws_user_behavior where behavior_code='%s' %s ) as the_one where one.num<=%s ) as the_two group by the_two.user_id; \" 2>&1 """ the_logic = the_insert % (tag_database, tag_name) + numeric % ( condition_property, tag_name, target_property, condition_property, condition_property, be_database, event_code, time_rule[:-24], topn) elif operator == 1: the_insert = "impala-shell -B -q \" insert overwrite table %s.%s " the_text = """ select the_three.user_id ,group_concat(distinct cast(the_three.%s as string),',') as %s from ( select the_two.* from ( select the_one.* ,DENSE_RANK() over(PARTITION BY the_one.user_id order by the_one.num desc) the_tank from ( select user_id ,%s ,count(%s) over(PARTITION BY user_id,%s) num from %s.dws_user_behavior where behavior_code='%s' %s ) as the_one ) the_two where the_two.the_tank<=%s ) the_three group by the_three.user_id; \" 2>&1 """ the_logic = the_insert % (tag_database, tag_name) + the_text % ( condition_property, tag_name, condition_property, condition_property, condition_property, be_database, event_code, time_rule[:-24], topn) else: TheUtiles.send_email( " %s Script parameter content error,needs to be 0 to 1:(Operator=%s)" % (tag_name, operator)) raise RuntimeError( "➤➤➤➤➤Pay attention to: Operator parameter content error,needs to be 0 to 1:(Operator=%s)" % operator) return the_logic
def merge_logic(the_bool, tag_name, the_type): user_database = Getconf.readfile().get('dbs', 'UserProfile') dim_database = Getconf.readfile().get('dbs', 'user') tag_database = Getconf.readfile().get('dbs', 'tag') tmp_database = Getconf.readfile().get('dbs', 'UserProfile_tmp') tmp_dir = Getconf.readfile().get('dir', 'UserProfile_tmp') user_dir = Getconf.readfile().get('dir', 'UserProfile') # 获取所有标签及其类型,判断有无画像表 tag_type = "impala-shell -B --output_delimiter '|' -q \"desc %s.dws_user_tag;\"" % user_database name_type = os.popen(tag_type).read() the_list = name_type[:-1].split("\n") # 合并sql,此处需加入用户维表 merge_sql = "select " # 需排除的字段 the_filed = Getconf.readfile().get('dim', 'filed') exclude_filed = the_filed.split(",") for mer in exclude_filed: merge_sql = merge_sql + "a.%s," % mer exclude_filed.append(tag_name) for tag in the_list: filed_list = tag.split("|") if filed_list[0] not in exclude_filed: merge_sql = merge_sql + "b.%s," % filed_list[0] if merge_sql.endswith(","): merge_sql = merge_sql + "c.%s from %s.dim_user as a left join %s.dws_user_tag as b " \ "on a.user_id=b.user_id left join %s.%s as c on a.user_id=c.user_id; \" 2>&1" % (tag_name, dim_database, user_database, tag_database, tag_name) else: merge_sql = merge_sql + ",c.%s from %s.dim_user as a left join %s.dws_user_tag as b " \ "on a.user_id=b.user_id left join %s.%s as c on a.user_id=c.user_id; \" 2>&1" % (tag_name, dim_database, user_database, tag_database, tag_name) delete_tmp_table = "impala-shell -q \" drop table %s.dws_user_tag_tmp \"" % tmp_database the_first = os.popen(delete_tmp_table).read() first_tmp = os.popen("hdfs dfs -rm -r %s/dws_user_tag_tmp 2>&1" % tmp_dir).read() the_tmp_create = "impala-shell -B -q \" create external table %s.dws_user_tag_tmp(" % tmp_database # 新建临时表并合并 for li in the_list: tag_and_type = li.split("|") the_tmp_create = the_tmp_create + "%s %s," % (tag_and_type[0], tag_and_type[1]) d = "string" if the_type == 0: d = "double" the_tmp_create = the_tmp_create + "%s %s) row format delimited fields terminated by '\001' " \ "stored as parquet tblproperties ('parquet.compress'='SNAPPY'); " % (tag_name, d) the_tmp_create = the_tmp_create + " insert overwrite table %s.dws_user_tag_tmp " % tmp_database + merge_sql # 新建标签表 tag_create = "impala-shell -B -q \" create external table %s.dws_user_tag(" % user_database for li in the_list: tag_and_type = li.split("|") tag_create = tag_create + "%s %s," % (tag_and_type[0], tag_and_type[1]) tag_create = tag_create[:-1] + ") row format delimited fields terminated by '\001' stored as parquet tblproperties ('parquet.compress'='SNAPPY'); " # 建表并插入数据至临时表 result = os.popen(the_tmp_create).read() if str(result).find("Table has been created") != -1 and str( result).find("ERROR") == -1: # 成功 delete_table = "impala-shell -q \" drop table %s.dws_user_tag \"" % user_database one = os.popen(delete_table).read() if str(one).find("Table has been dropped") == -1: TheUtiles.last_deal(the_bool, tag_name) TheUtiles.send_email( '%s script failed to delete table, this tag will not exist in the portrait table' % tag_name) raise RuntimeError( "➤➤➤➤➤Pay attention to: Delete the table failed") # 改名,并判断是否有今日数据,有则删,合并脚本中一样! now_time = datetime.datetime.now() name_suffix = str(now_time)[:10] + "_" + str( now_time)[11:13] + "_" + str(now_time)[14:16] + "_" + str( now_time)[17:19] + "_" + str(now_time)[20:22] # 判断目录存在 is_exit = os.popen( "hadoop fs -find %s/ -iname 'dws_user_tag_%s*'" % (user_dir, str(now_time)[:10])).read() two = os.popen( "hdfs dfs -mv %s/dws_user_tag %s/dws_user_tag_%s;echo $?" % (user_dir, user_dir, name_suffix)).read() exit_list = os.popen( "hadoop fs -find %s/ -iname 'dws_user_tag_%s*'" % (user_dir, str(now_time)[:10])).read() if two[:-1] == "0" and is_exit[:-1] != "": # 存在 dir_list = exit_list[:-1].split('\n') dir_one = dir_list[0][-11:-9] + dir_list[0][-8:-6] + dir_list[ 0][-5:-3] + dir_list[0][-2:] dir_two = dir_list[1][-11:-9] + dir_list[1][-8:-6] + dir_list[ 1][-5:-3] + dir_list[1][-2:] rf = "" if int(dir_two) > int(dir_one): rf = os.popen("hdfs dfs -rm -r %s 2>&1" % dir_list[0]).read() else: rf = os.popen("hdfs dfs -rm -r %s 2>&1" % dir_list[1]).read() if str(rf).find("INFO fs.TrashPolicyDefault: Moved") == -1: TheUtiles.send_email( "First attempt to delete today's extra copies failed") # 失败则新建表并插入数据 three = os.popen( "impala-shell -q \" ALTER TABLE %s.dws_user_tag_tmp RENAME TO %s.dws_user_tag \" 2>&1" % (tmp_database, user_database)).read() four = os.popen( "hdfs dfs -mv %s/dws_user_tag_tmp %s/dws_user_tag" % (tmp_dir, user_dir)).read() ip = Getconf.readfile().get('hdfs', 'ip') # 映射失败则标签表中无数据 five = os.popen( "impala-shell -q \" alter table %s.dws_user_tag set location 'hdfs://%s:8020%s/dws_user_tag' \" 2>&1" % (user_database, ip, user_dir)).read() TheUtiles.last_deal(the_bool, tag_name) if str(three).find("Renaming was successful") == -1 or str( five).find("New location has been set") == -1: TheUtiles.send_email( "The first merge %s script fails,The portrait table will reset to the previous data" % tag_name) # 删除画像表,及其目录,新建并插入数据 delete_tag = "impala-shell -q \" drop table %s.dws_user_tag \"" % user_database os.popen(delete_tag) os.popen("hdfs dfs -rm -r %s/dws_user_tag" % user_dir) # 使用load old_tag = os.popen(tag_create).read() load_ok = os.popen( "hive -e \"load data inpath '%s/dws_user_tag_%s/*' overwrite into table %s.dws_user_tag\" 2>&1" % (user_dir, name_suffix, user_database)).read() six = os.popen( "impala-shell -q \" refresh %s.dws_user_tag \"" % user_database) sev = os.popen( "hdfs dfs -cp %s/dws_user_tag/* %s/dws_user_tag_%s " % (user_dir, user_dir, name_suffix)) if str(old_tag).find("Table has been created") == -1 or str( load_ok).find("FAILED") != -1: TheUtiles.send_email( 'Reset failed for portrait table!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) raise RuntimeError( "➤➤➤➤➤Pay attention to: Reset failed for portrait table!" ) else: # 失败 TheUtiles.last_deal(the_bool, tag_name) TheUtiles.send_email('The first merge %s script fails' % tag_name) raise RuntimeError( "➤➤➤➤➤Pay attention to: The first merge %s script fails" % tag_name)