Beispiel #1
0
 def extract(self):
     self.queryData = QueryData.QueryData()
     self.queryData.connect()
     result = utils.sqlHandler(queryData=self.queryData,
                               sqlType="query",
                               sql=extract_sql,
                               piece=10000)
     self.queryData.close()
     conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                     user=self.cp.get("mysql", "user_2"),
                                     pwd=self.cp.get("mysql", "pwd_2"),
                                     dbname=self.cp.get(
                                         "mysql", "dbname_2"))
     conn_load.connect()
     conn_load.update(delete_before_insert)
     conn_load.commit()
     time.sleep(2)
     for item in result:
         for key in item:
             if type(item[key]) == str and item[key] != "null":
                 item[key] = "'" + item[key] + "'"
         item["create_date"] = time.strftime(
             "%Y-%m-%d", time.localtime(item["create_time"]))
         item["create_date"] = "'" + item["create_date"] + "'"
         conn_load.update(extract_load % item)
     print("完成合同主题抽取、转换、存储......")
     conn_load.commit()
     conn_load.close()
Beispiel #2
0
    def load(self):
        # 计算创建日期在三天以内的静态

        """
            整体过程是将处理后的主题类数据上传到数据库服务器
        """
        try:
            self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2"))
            self.queryData.connect()
            conn = QueryData.QueryData()
            conn.connect()
            for hash_key in self.emp_result:
                per_theme_data = self.emp_result[hash_key][0]
                self.queryData.update(load_sql%per_theme_data)

            self.queryData.commit()
            self.calc_static_validity(conn,self.queryData)
            print ("完成市场主题存储......")
            # 数据存储之后,validity_for_once字段
            self.queryData.close()
            conn.close()
            writelogging.logger.info("市场主题数据装载完成,本次%s条记录"%len(self.emp_result))
        except Exception as e:
            writelogging.logger.error("市场主题数据装载失败")
        self.emp_result = {}
Beispiel #3
0
    def extract(self):
        try:
            self.queryData = QueryData.QueryData()
            self.queryData.connect()
            result = utils.sqlHandler(queryData=self.queryData,sqlType="query",sql=extract_sql,piece = 3000)
            self.queryData.close()
        except Exception as e:
            writelogging.logger.error("市场主题数据库连接失败,请检查用户名,密码")
        try:
            # 一下开始插入数据到另外一个数据库
            self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2"))
            self.queryData.connect()
            # 首先清除之前的错误数据
            self.queryData.update(delete_before_insert)
            self.queryData.commit()
            for item in result:
                if (not item["status"]):
                    item["status"] ="null"
                    item["emp_status"] = "null"
                elif (item["status"] in self.emp_status):
                    item["emp_status"] = self.emp_status[item["status"]]
                else:
                    item["emp_status"] = "null"
                if (not item["validity"]):
                    item["validity"] = "null"
                    item["state"] = "null"
                elif ( item["validity"] in self.status_dict):
                    item["state"] = self.status_dict[item["validity"]]
                else:
                    item["state"] = "null"
                if item["update_time"] == "null":
                    item["update_time"] = "0000-00-00 00:00:00"
                if item["create_time"] == "null":
                    item["create_time"] = "0000-00-00 00:00:00"

                if (item["description"] and (item["description"].find("爷")>=0 or item["description"].find("奶")>=0) ):
                    item["description"] = "非双亲陪同"
                else:
                    item["description"]=" "
                # 循环遍历每一项,如果为None,则改为null
                # for key in item.keys():
                #     if (not item[key]):
                #         item[key]="null"

                if (item["validity"]):
                    self.queryData.update(extract_insert_sql%item)
            self.queryData.commit()
            self.queryData.close()
            print ("完成市场主题抽取......")
            writelogging.logger.info("市场信息表抽取完成,一共%s个记录"%len(result))
        except Exception as e:
            writelogging.logger.error("市场主题数据抽取、数据库连接失败")
Beispiel #4
0
 def extract(self):
     queryData = QueryData.QueryData()
     try:
         queryData.connect()
         result = queryData.query(sql)
         queryData.close()
         self.result = result
     except Exception as e:
         raise
Beispiel #5
0
 def transform(self):
     """
         1、从数据表中读取所有员工信息
         2、循环遍历每个员工,分别计算每个指标,并存储在字典中,如{"A指标":"##","B指标":"##"}
         3、将第2个步骤中的结果存储在数据库中,从而完成转换,插入数据的原则是没有插入,有则修改结果
     """
     self.queryData = QueryData.QueryData(host=self.cp.get("mysql","host_2"),user=self.cp.get("mysql","user_2"),pwd=self.cp.get("mysql","pwd_2"),dbname=self.cp.get("mysql","dbname_2"))
     self.queryData.connect()
     # subSql = "select concat(emp_id,date(create_time)) from bi_sale_info where date(update_time) = date_sub(curdate(),interval 1 day)"
     # result4update  = self.queryData.query(subSql)
     # result4update = [value["concat(emp_id,date(create_time))"] for value in result4update]
     # result4update = tuple(result4update)
     _sql = "select * from bi_sale_info where date(create_time) between '%s' and '%s'"%(start_date,end_date)
     times = 0
     while True:
         # sql = _sql+" or concat(emp_id,date(create_time)) in"+str(result4update)+" limit %s,%s"%(times*limit,limit)
         result_info = self.queryData.query(_sql+" limit %s, %s"%(times*limit,limit))
         length = len(result_info)
         for entry in result_info:
             group = "%s%s%s"%(entry["emp_id"],entry["campus_name"],entry["create_time"][0:10])
             hl_md5_obj = hashlib.md5(group.encode("utf-8"))
             # 唯一键,标明订单属于哪一个员工,哪一个校区的订单
             unique = hl_md5_obj.hexdigest()
             if (unique not in self.emp_result.keys()):
                 self.emp_result[unique] =[{\
                     "emp_id":entry["emp_id"],\
                     "emp_name":entry["emp_cn_name"],\
                     "campus_name":entry["campus_name"],\
                     "create_date":entry["create_time"][0:10],\
                     "order_total":0,\
                     "order_valid_num":0,\
                     "order_invalid_num":0,\
                     "invite_total":0,\
                     "order_no_parents":0,\
                     "contact_total":0\
                 }]
             everyGroup = self.emp_result[unique][0]
             everyGroup["order_total"] +=1
             if (entry["order_status"] in ["有效","再联系","邀约成功"]):
                 everyGroup["order_valid_num"] += 1
             if (entry["order_status"] in ["无效"]):
                 everyGroup["order_invalid_num"] += 1
             if (entry["order_status"] in ["邀约成功"]):
                 everyGroup["invite_total"] += 1
             if (entry["order_status"] in ["再联系"]):
                 everyGroup["contact_total"] += 1
             if (entry["description"] and entry["description"].find("非双亲")>=0 ):
                 everyGroup["order_no_parents"] += 1
         times+=1
         time.sleep(1)
         if length < limit:
             print ("完成市场主题转换......")
             self.queryData.close()
             break
    def extract(self):
        delete_before_insert = "delete from bi_visit_info where date(visit_time) between '%s' and '%s' " % (
            start_date, end_date)
        load_sql = "insert into bi_visit_info(student_id,visit_campus,visit_time,invite_role_id,department_name,v_ftm_id\
        ) values(%(student_id)s,'%(visit_campus)s','%(visit_time)s',%(invite_role_id)s,'%(department_name)s','%(v_ftm_id)s')\
                    on duplicate key update student_id=values(student_id),visit_campus=values(visit_campus),invite_role_id=values(invite_role_id),department_name=values(department_name),v_ftm_id=values(v_ftm_id)"

        self.queryData = QueryData.QueryData()
        self.queryData.connect()
        resultSet = self.queryData.query(extract_sql)
        resultSet = self.getTarget(resultSet)

        conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                        user=self.cp.get("mysql", "user_2"),
                                        pwd=self.cp.get("mysql", "pwd_2"),
                                        dbname=self.cp.get(
                                            "mysql", "dbname_2"))
        conn_load.connect()
        conn_load.update(delete_before_insert)
        conn_load.commit()
        for item in resultSet:
            # student_id,visit_campus,invite_role_id,creator_role_id
            department_name = self.queryDept(item["invite_role_id"])
            time.sleep(0.2)
            item["department_name"] = ""
            if department_name and department_name.get("data"):
                item["department_name"] = department_name["data"][0]
            if (not item["visit_campus"]):
                item["visit_campus"] = item["source_campus"]
            v_ftm_list = self.queryData.query(
                "select v_ftm from v_student where student_id=%(student_id)s" %
                item)
            item["v_ftm_id"] = ""
            v_ftm = v_ftm_list[0].get("v_ftm")
            if v_ftm:
                item["v_ftm_id"] = v_ftm
            conn_load.update(load_sql % item)
        print("完成visit主题抽取......")
        conn_load.commit()
        conn_load.close()
        self.queryData.close()
Beispiel #7
0
 def extract(self):
     self.queryData = QueryData.QueryData()
     self.queryData.connect()
     conn_load = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                     user=self.cp.get("mysql", "user_2"),
                                     pwd=self.cp.get("mysql", "pwd_2"),
                                     dbname=self.cp.get(
                                         "mysql", "dbname_2"))
     conn_load.connect()
     conn_load.update(delete_before_insert)
     conn_load.commit()
     times = 0
     while True:
         result = self.queryData.query(
             extract_sql % (start_date, end_date, times * limit))
         total = len(result)
         print("Tmk抽取数量  %s" % total)
         for entry in result:
             # 处理需要转换的数据
             if entry["dept_id"] == 74:
                 entry["group_name"] = "成人组"
             elif entry["dept_id"] == 75:
                 entry["group_name"] = "少儿组"
             elif entry["dept_id"] == 76:
                 entry["group_name"] = "跟进组"
             else:
                 entry["group_name"] = "null"
             for field in entry:
                 value = entry[field]
                 if (type(value) == str and value != "null"):
                     entry[field] = "'" + value + "'"
             conn_load.update(extract_load % entry)
         times += 1
         conn_load.commit()
         time.sleep(1)
         if (total < limit):
             print("完成TMK主题抽取......")
             self.queryData.close()
             conn_load.close()
             break
    def load(self):
        # delete_before_insert = "delete from bi_visit_theme where visit_date between '%s' and '%s' "%(start_date,end_date)
        load_sql = "insert into bi_visit_theme(visit_campus,visit_num,visit_date) \
                                values(%(visit_campus)s,%(visit_num)s,%(visit_date)s)\
                                on duplicate key update visit_num = values(visit_num)"

        conn = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                   user=self.cp.get("mysql", "user_2"),
                                   pwd=self.cp.get("mysql", "pwd_2"),
                                   dbname=self.cp.get("mysql", "dbname_2"))
        conn.connect()
        for hash_key in self.visit_result:
            per_theme_data = self.visit_result[hash_key][0]
            for field in per_theme_data:
                if type(per_theme_data[field]
                        ) == str and per_theme_data[field] != "null":
                    per_theme_data[field] = "'" + per_theme_data[field] + "'"
            conn.update(load_sql % per_theme_data)
        self.visit_result = {}
        print("完成visit主题存储......")
        conn.commit()
        conn.close()
 def transform(self):
     self.queryData = QueryData.QueryData(
         host=self.cp.get("mysql", "host_2"),
         user=self.cp.get("mysql", "user_2"),
         pwd=self.cp.get("mysql", "pwd_2"),
         dbname=self.cp.get("mysql", "dbname_2"))
     self.queryData.connect()
     sql = "select * from bi_visit_info where date(visit_time)  between '%s' and '%s' " % (
         start_date, end_date)
     resultSet = self.queryData.query(sql)
     for entry in resultSet:
         source_data = "%s%s" % (entry["visit_campus"],
                                 entry["visit_time"][0:10])
         source_hash = hashlib.md5(source_data.encode("utf-8")).hexdigest()
         if self.visit_result.get(source_hash) == None:
             self.visit_result[source_hash]=[{\
                     "visit_date":entry["visit_time"][0:10],\
                     "visit_campus":entry["visit_campus"],\
                     "visit_num":0\
             }]
         load_entry = self.visit_result[source_hash][0]
         load_entry["visit_num"] += 1
     print("完成visit主题转换......")
Beispiel #10
0
    def load(self):
        load_insert = "insert into bi_tmk_theme(\
        emp_id,emp_name,group_name,order_total,order_new,order_old,campus_name,invite_success_total,\
        contact_total,order_invalid_num,calculate_date,order_unknown_num,order_valid_num)\
        values(%(emp_id)s,%(emp_name)s,%(group_name)s,%(order_total)s,%(order_new)s,%(order_old)s,\
        %(campus_name)s,%(invite_success_total)s,%(contact_total)s,%(order_invalid_num)s,%(calculate_date)s,\
        %(order_unknown_num)s,%(order_valid_num)s)\
        on duplicate key update emp_name=values(emp_name),\
                                group_name=values(group_name),\
                                order_total=values(order_total),\
                                order_new=values(order_new),\
                                order_old=values(order_old),\
                                campus_name=values(campus_name),\
                                invite_success_total=values(invite_success_total),\
                                contact_total=values(contact_total),\
                                order_invalid_num=values(order_invalid_num),\
                                order_unknown_num=values(order_unknown_num),\
                                order_valid_num=values(order_valid_num)"

        conn = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                   user=self.cp.get("mysql", "user_2"),
                                   pwd=self.cp.get("mysql", "pwd_2"),
                                   dbname=self.cp.get("mysql", "dbname_2"))
        conn.connect()
        # 目标表中,如果员工号,校区,时间,有一项不同,就需要insert,完全相同则是update
        for hash_key in self.tmk_result:
            per_theme_data = self.tmk_result[hash_key][0]
            for field in per_theme_data:
                if type(per_theme_data[field]
                        ) == str and per_theme_data[field] != "null":
                    per_theme_data[field] = "'" + per_theme_data[field] + "'"
            conn.update(load_insert % per_theme_data)
        conn.commit()
        print("完成TMK主题存储......")
        conn.close()
        writelogging.logger.info("TMK主题数据装载结束,本次%s条记录" % len(self.tmk_result))
        self.tmk_result = {}
Beispiel #11
0
    def transform(self):
        sql = "select * from bi_tmk_info\
            where date(update_time) = '%s'"

        unique = {}
        # 用来查询有没有更早的新单
        self.queryData = QueryData.QueryData()
        self.queryData.connect()
        conn_query = QueryData.QueryData(host=self.cp.get("mysql", "host_2"),
                                         user=self.cp.get("mysql", "user_2"),
                                         pwd=self.cp.get("mysql", "pwd_2"),
                                         dbname=self.cp.get(
                                             "mysql", "dbname_2"))
        conn_query.connect()
        # subSql = "select concat(emp_id,date(create_time)) from bi_tmk_info where date(update_time) = date_sub(curdate(),interval 1 day)"
        # result4update  = conn_query.query(subSql)
        # result4update = [value["concat(emp_id,date(create_time))"] for value in result4update]
        # # 避免因为一个结果生成类似("",)的元组,在sql中会产生错误
        # result4update.extend(["查询无结果"+str(datetime.datetime.now())]*2)
        # result4update = tuple(result4update)
        global start_date
        while start_date <= end_date:
            result = conn_query.query(sql % start_date)
            unique.update(self.dataDistinct(result))
            start_date = datetime.datetime.strptime(
                start_date, "%Y-%m-%d") + datetime.timedelta(days=1)
            start_date = start_date.strftime("%Y-%m-%d")
            time.sleep(1)

        # 遍历所有记录,去最新数据,当天多个电话去重
        for index, _entry in enumerate(unique.values()):
            _entry = _entry[0]
            source_data = ("%s%s%s" % (_entry["emp_id"], _entry["campus_name"],
                                       _entry["update_time"][0:10]))
            source_hash = hashlib.md5(source_data.encode("utf-8")).hexdigest()
            if source_hash not in self.tmk_result:
                self.tmk_result[source_hash] = [{\
                    "emp_id":_entry["emp_id"],\
                    "emp_name":_entry["emp_name"],\
                    "calculate_date":_entry["update_time"][0:10],\
                    "campus_name":_entry["campus_name"],\
                    "group_name":_entry["group_name"],\
                    "order_total":0,\
                    "order_new":0,\
                    "order_old":0,\
                    "invite_success_total":0,\
                    "contact_total":0,\
                    "order_invalid_num":0,\
                    "order_valid_num":0,\
                    "order_unknown_num":0\
                }]
            load_entry = self.tmk_result[source_hash][0]
            load_entry["order_total"] += 1
            if _entry["validity"] in ["邀约成功"]:
                load_entry["invite_success_total"] += 1
            if _entry["validity"] in ["再联系"]:
                load_entry["contact_total"] += 1
            if _entry["validity"] in ["无效"]:
                load_entry["order_invalid_num"] += 1
            if _entry["validity"] in ["未知"]:
                load_entry["order_unknown_num"] += 1
            if _entry["validity"] in ["有效"]:
                load_entry["order_valid_num"] += 1
            result = self.queryData.query(
                "select count(*) count from v_invitation where student_id=%s and date(s_d_time)<'%s'"
                % (_entry["student_id"], _entry["update_time"][0:10]))
            if (not result[0]["count"]):
                load_entry["order_new"] += 1
            load_entry["order_old"] = load_entry["order_total"] - load_entry[
                "order_new"]
        self.queryData.close()
        conn_query.close()
        print("完成tmk主题转换")