def extract(self): self.queryData = QueryData.QueryData() self.queryData.connect() result = utils.sqlHandler(queryData=self.queryData, sqlType="query", sql=extract_sql, piece=10000) self.queryData.close() conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get( "mysql", "dbname_2")) conn_load.connect() conn_load.update(delete_before_insert) conn_load.commit() time.sleep(2) for item in result: for key in item: if type(item[key]) == str and item[key] != "null": item[key] = "'" + item[key] + "'" item["create_date"] = time.strftime( "%Y-%m-%d", time.localtime(item["create_time"])) item["create_date"] = "'" + item["create_date"] + "'" conn_load.update(extract_load % item) print("完成合同主题抽取、转换、存储......") conn_load.commit() conn_load.close()
def load(self): # 计算创建日期在三天以内的静态 """ 整体过程是将处理后的主题类数据上传到数据库服务器 """ try: self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2")) self.queryData.connect() conn = QueryData.QueryData() conn.connect() for hash_key in self.emp_result: per_theme_data = self.emp_result[hash_key][0] self.queryData.update(load_sql%per_theme_data) self.queryData.commit() self.calc_static_validity(conn,self.queryData) print ("完成市场主题存储......") # 数据存储之后,validity_for_once字段 self.queryData.close() conn.close() writelogging.logger.info("市场主题数据装载完成,本次%s条记录"%len(self.emp_result)) except Exception as e: writelogging.logger.error("市场主题数据装载失败") self.emp_result = {}
def extract(self): try: self.queryData = QueryData.QueryData() self.queryData.connect() result = utils.sqlHandler(queryData=self.queryData,sqlType="query",sql=extract_sql,piece = 3000) self.queryData.close() except Exception as e: writelogging.logger.error("市场主题数据库连接失败,请检查用户名,密码") try: # 一下开始插入数据到另外一个数据库 self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2")) self.queryData.connect() # 首先清除之前的错误数据 self.queryData.update(delete_before_insert) self.queryData.commit() for item in result: if (not item["status"]): item["status"] ="null" item["emp_status"] = "null" elif (item["status"] in self.emp_status): item["emp_status"] = self.emp_status[item["status"]] else: item["emp_status"] = "null" if (not item["validity"]): item["validity"] = "null" item["state"] = "null" elif ( item["validity"] in self.status_dict): item["state"] = self.status_dict[item["validity"]] else: item["state"] = "null" if item["update_time"] == "null": item["update_time"] = "0000-00-00 00:00:00" if item["create_time"] == "null": item["create_time"] = "0000-00-00 00:00:00" if (item["description"] and (item["description"].find("爷")>=0 or item["description"].find("奶")>=0) ): item["description"] = "非双亲陪同" else: item["description"]=" " # 循环遍历每一项,如果为None,则改为null # for key in item.keys(): # if (not item[key]): # item[key]="null" if (item["validity"]): self.queryData.update(extract_insert_sql%item) self.queryData.commit() self.queryData.close() print ("完成市场主题抽取......") writelogging.logger.info("市场信息表抽取完成,一共%s个记录"%len(result)) except Exception as e: writelogging.logger.error("市场主题数据抽取、数据库连接失败")
def extract(self): queryData = QueryData.QueryData() try: queryData.connect() result = queryData.query(sql) queryData.close() self.result = result except Exception as e: raise
def transform(self): """ 1、从数据表中读取所有员工信息 2、循环遍历每个员工,分别计算每个指标,并存储在字典中,如{"A指标":"##","B指标":"##"} 3、将第2个步骤中的结果存储在数据库中,从而完成转换,插入数据的原则是没有插入,有则修改结果 """ self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2")) self.queryData.connect() # subSql = "select concat(emp_id,date(create_time)) from bi_sale_info where date(update_time) = date_sub(curdate(),interval 1 day)" # result4update = self.queryData.query(subSql) # result4update = [value["concat(emp_id,date(create_time))"] for value in result4update] # result4update = tuple(result4update) _sql = "select * from bi_sale_info where date(create_time) between '%s' and '%s'"%(start_date,end_date) times = 0 while True: # sql = _sql+" or concat(emp_id,date(create_time)) in"+str(result4update)+" limit %s,%s"%(times*limit,limit) result_info = self.queryData.query(_sql+" limit %s, %s"%(times*limit,limit)) length = len(result_info) for entry in result_info: group = "%s%s%s"%(entry["emp_id"],entry["campus_name"],entry["create_time"][0:10]) hl_md5_obj = hashlib.md5(group.encode("utf-8")) # 唯一键,标明订单属于哪一个员工,哪一个校区的订单 unique = hl_md5_obj.hexdigest() if (unique not in self.emp_result.keys()): self.emp_result[unique] =[{\ "emp_id":entry["emp_id"],\ "emp_name":entry["emp_cn_name"],\ "campus_name":entry["campus_name"],\ "create_date":entry["create_time"][0:10],\ "order_total":0,\ "order_valid_num":0,\ "order_invalid_num":0,\ "invite_total":0,\ "order_no_parents":0,\ "contact_total":0\ }] everyGroup = self.emp_result[unique][0] everyGroup["order_total"] +=1 if (entry["order_status"] in ["有效","再联系","邀约成功"]): everyGroup["order_valid_num"] += 1 if (entry["order_status"] in ["无效"]): everyGroup["order_invalid_num"] += 1 if (entry["order_status"] in ["邀约成功"]): everyGroup["invite_total"] += 1 if (entry["order_status"] in ["再联系"]): everyGroup["contact_total"] += 1 if (entry["description"] and entry["description"].find("非双亲")>=0 ): everyGroup["order_no_parents"] += 1 times+=1 time.sleep(1) if length < limit: print ("完成市场主题转换......") self.queryData.close() break
def extract(self): delete_before_insert = "delete from bi_visit_info where date(visit_time) between '%s' and '%s' " % ( start_date, end_date) load_sql = "insert into bi_visit_info(student_id,visit_campus,visit_time,invite_role_id,department_name,v_ftm_id\ ) values(%(student_id)s,'%(visit_campus)s','%(visit_time)s',%(invite_role_id)s,'%(department_name)s','%(v_ftm_id)s')\ on duplicate key update student_id=values(student_id),visit_campus=values(visit_campus),invite_role_id=values(invite_role_id),department_name=values(department_name),v_ftm_id=values(v_ftm_id)" self.queryData = QueryData.QueryData() self.queryData.connect() resultSet = self.queryData.query(extract_sql) resultSet = self.getTarget(resultSet) conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get( "mysql", "dbname_2")) conn_load.connect() conn_load.update(delete_before_insert) conn_load.commit() for item in resultSet: # student_id,visit_campus,invite_role_id,creator_role_id department_name = self.queryDept(item["invite_role_id"]) time.sleep(0.2) item["department_name"] = "" if department_name and department_name.get("data"): item["department_name"] = department_name["data"][0] if (not item["visit_campus"]): item["visit_campus"] = item["source_campus"] v_ftm_list = self.queryData.query( "select v_ftm from v_student where student_id=%(student_id)s" % item) item["v_ftm_id"] = "" v_ftm = v_ftm_list[0].get("v_ftm") if v_ftm: item["v_ftm_id"] = v_ftm conn_load.update(load_sql % item) print("完成visit主题抽取......") conn_load.commit() conn_load.close() self.queryData.close()
def extract(self): self.queryData = QueryData.QueryData() self.queryData.connect() conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get( "mysql", "dbname_2")) conn_load.connect() conn_load.update(delete_before_insert) conn_load.commit() times = 0 while True: result = self.queryData.query( extract_sql % (start_date, end_date, times * limit)) total = len(result) print("Tmk抽取数量 %s" % total) for entry in result: # 处理需要转换的数据 if entry["dept_id"] == 74: entry["group_name"] = "成人组" elif entry["dept_id"] == 75: entry["group_name"] = "少儿组" elif entry["dept_id"] == 76: entry["group_name"] = "跟进组" else: entry["group_name"] = "null" for field in entry: value = entry[field] if (type(value) == str and value != "null"): entry[field] = "'" + value + "'" conn_load.update(extract_load % entry) times += 1 conn_load.commit() time.sleep(1) if (total < limit): print("完成TMK主题抽取......") self.queryData.close() conn_load.close() break
def load(self): # delete_before_insert = "delete from bi_visit_theme where visit_date between '%s' and '%s' "%(start_date,end_date) load_sql = "insert into bi_visit_theme(visit_campus,visit_num,visit_date) \ values(%(visit_campus)s,%(visit_num)s,%(visit_date)s)\ on duplicate key update visit_num = values(visit_num)" conn = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get("mysql", "dbname_2")) conn.connect() for hash_key in self.visit_result: per_theme_data = self.visit_result[hash_key][0] for field in per_theme_data: if type(per_theme_data[field] ) == str and per_theme_data[field] != "null": per_theme_data[field] = "'" + per_theme_data[field] + "'" conn.update(load_sql % per_theme_data) self.visit_result = {} print("完成visit主题存储......") conn.commit() conn.close()
def transform(self): self.queryData = QueryData.QueryData( host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get("mysql", "dbname_2")) self.queryData.connect() sql = "select * from bi_visit_info where date(visit_time) between '%s' and '%s' " % ( start_date, end_date) resultSet = self.queryData.query(sql) for entry in resultSet: source_data = "%s%s" % (entry["visit_campus"], entry["visit_time"][0:10]) source_hash = hashlib.md5(source_data.encode("utf-8")).hexdigest() if self.visit_result.get(source_hash) == None: self.visit_result[source_hash]=[{\ "visit_date":entry["visit_time"][0:10],\ "visit_campus":entry["visit_campus"],\ "visit_num":0\ }] load_entry = self.visit_result[source_hash][0] load_entry["visit_num"] += 1 print("完成visit主题转换......")
def load(self): load_insert = "insert into bi_tmk_theme(\ emp_id,emp_name,group_name,order_total,order_new,order_old,campus_name,invite_success_total,\ contact_total,order_invalid_num,calculate_date,order_unknown_num,order_valid_num)\ values(%(emp_id)s,%(emp_name)s,%(group_name)s,%(order_total)s,%(order_new)s,%(order_old)s,\ %(campus_name)s,%(invite_success_total)s,%(contact_total)s,%(order_invalid_num)s,%(calculate_date)s,\ %(order_unknown_num)s,%(order_valid_num)s)\ on duplicate key update emp_name=values(emp_name),\ group_name=values(group_name),\ order_total=values(order_total),\ order_new=values(order_new),\ order_old=values(order_old),\ campus_name=values(campus_name),\ invite_success_total=values(invite_success_total),\ contact_total=values(contact_total),\ order_invalid_num=values(order_invalid_num),\ order_unknown_num=values(order_unknown_num),\ order_valid_num=values(order_valid_num)" conn = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get("mysql", "dbname_2")) conn.connect() # 目标表中,如果员工号,校区,时间,有一项不同,就需要insert,完全相同则是update for hash_key in self.tmk_result: per_theme_data = self.tmk_result[hash_key][0] for field in per_theme_data: if type(per_theme_data[field] ) == str and per_theme_data[field] != "null": per_theme_data[field] = "'" + per_theme_data[field] + "'" conn.update(load_insert % per_theme_data) conn.commit() print("完成TMK主题存储......") conn.close() writelogging.logger.info("TMK主题数据装载结束,本次%s条记录" % len(self.tmk_result)) self.tmk_result = {}
def transform(self): sql = "select * from bi_tmk_info\ where date(update_time) = '%s'" unique = {} # 用来查询有没有更早的新单 self.queryData = QueryData.QueryData() self.queryData.connect() conn_query = QueryData.QueryData(host=self.cp.get("mysql", "host_2"), user=self.cp.get("mysql", "user_2"), pwd=self.cp.get("mysql", "pwd_2"), dbname=self.cp.get( "mysql", "dbname_2")) conn_query.connect() # subSql = "select concat(emp_id,date(create_time)) from bi_tmk_info where date(update_time) = date_sub(curdate(),interval 1 day)" # result4update = conn_query.query(subSql) # result4update = [value["concat(emp_id,date(create_time))"] for value in result4update] # # 避免因为一个结果生成类似("",)的元组,在sql中会产生错误 # result4update.extend(["查询无结果"+str(datetime.datetime.now())]*2) # result4update = tuple(result4update) global start_date while start_date <= end_date: result = conn_query.query(sql % start_date) unique.update(self.dataDistinct(result)) start_date = datetime.datetime.strptime( start_date, "%Y-%m-%d") + datetime.timedelta(days=1) start_date = start_date.strftime("%Y-%m-%d") time.sleep(1) # 遍历所有记录,去最新数据,当天多个电话去重 for index, _entry in enumerate(unique.values()): _entry = _entry[0] source_data = ("%s%s%s" % (_entry["emp_id"], _entry["campus_name"], _entry["update_time"][0:10])) source_hash = hashlib.md5(source_data.encode("utf-8")).hexdigest() if source_hash not in self.tmk_result: self.tmk_result[source_hash] = [{\ "emp_id":_entry["emp_id"],\ "emp_name":_entry["emp_name"],\ "calculate_date":_entry["update_time"][0:10],\ "campus_name":_entry["campus_name"],\ "group_name":_entry["group_name"],\ "order_total":0,\ "order_new":0,\ "order_old":0,\ "invite_success_total":0,\ "contact_total":0,\ "order_invalid_num":0,\ "order_valid_num":0,\ "order_unknown_num":0\ }] load_entry = self.tmk_result[source_hash][0] load_entry["order_total"] += 1 if _entry["validity"] in ["邀约成功"]: load_entry["invite_success_total"] += 1 if _entry["validity"] in ["再联系"]: load_entry["contact_total"] += 1 if _entry["validity"] in ["无效"]: load_entry["order_invalid_num"] += 1 if _entry["validity"] in ["未知"]: load_entry["order_unknown_num"] += 1 if _entry["validity"] in ["有效"]: load_entry["order_valid_num"] += 1 result = self.queryData.query( "select count(*) count from v_invitation where student_id=%s and date(s_d_time)<'%s'" % (_entry["student_id"], _entry["update_time"][0:10])) if (not result[0]["count"]): load_entry["order_new"] += 1 load_entry["order_old"] = load_entry["order_total"] - load_entry[ "order_new"] self.queryData.close() conn_query.close() print("完成tmk主题转换")