def import_data(date, side, path): """ [email protected] 2016-07-04 大搜PV数据获取不到,改写逻辑 :param date: :param side: :param path: :return: """ pv_ftp = convert_path(source_config.SPO_SRC[side]["pv"], date) srcid_ftp = convert_path(source_config.SPO_SRC[side]["srcid"], date) pv_path = os.path.join(path, "pv.%s" % source_config.SIDE_NAME[side]) srcid_path = os.path.join(path, "srcid.%s" % source_config.SIDE_NAME[side]) pv_success = True try: # 获取大搜PV数据 tools.wget(pv_ftp, pv_path) except: logging.info(u"下载PV数据失败!") pv_success = False try: tools.wget(srcid_ftp, srcid_path) except: logging.info(u"下载srcid数据失败!") return if pv_success: pv = open(pv_path).read().rstrip("\r\n") pv = float(pv) else: pv = None srcid_query_map, srcid_pv_map, srcid_effect_map = build_srcid_map( srcid_path, pv) stat_db = db.SaveDataBase(date, side) stat_db.clear_spo_srcid_stat() stat_db.save_spo_srcid_stat("srcid_pv", srcid_pv_map.items()) logging.info("srcid_pv number:%s" % len(srcid_pv_map)) if srcid_effect_map: stat_db.save_spo_srcid_stat("srcid_effect", srcid_effect_map.items()) logging.info("srcid_effect number:%s" % len(srcid_effect_map)) else: logging.info("there is no srcid_effect data") stat_db.clear_spo_query_stat() query_stat_list = [] for srcid, query_pv_list in srcid_query_map.items(): for query_pv in query_pv_list: query_stat_list.append([srcid, query_pv["query"], query_pv["pv"]]) logging.info("query stat number:%s" % len(query_stat_list)) stat_db.save_spo_query_stat(query_stat_list) stat_db.close()
def import_data(side, date, path, source, product_id): try: tools.wget(source, path) except: logging.info(u"下载失败!") return stat_map = {} position_list = [] url_list = [] reg = re.compile(r"^\[(\w+)\]$") this_mod = None with open(path) as fp: for line in fp: line = line.rstrip("\r\n").decode("utf-8") match = reg.match(line) if match: this_mod = match.group(1) if this_mod not in ("index", "position", "detail"): logging.info("unkown mod: %s" % this_mod) continue if this_mod == "index": index_parse(line, stat_map) elif this_mod == "position": position_parse(line, position_list) elif this_mod == "detail": detail_parse(line, url_list) #position_list截取前50 position_list.sort(key=lambda x: x[0]) position_list = position_list[:min(50, len(position_list))] #url_list去重 # url_map = {url[0]: url for url in url_list} # url_list = url_map.values() #导入数据库 stat_db = db.SaveDataBase(date, side) logging.info("stat number:%s" % len(stat_map)) stat_db.clear_midpage_stat(product_id) stat_db.save_midpage_stat(product_id, stat_map.items()) logging.info("position number:%s" % len(position_list)) stat_db.clear_midpage_position_stat(product_id) stat_db.save_midpage_position_stat(product_id, position_list) logging.info("url number:%s" % len(url_list)) stat_db.clear_midpage_url_stat(product_id) stat_db.save_midpage_url_stat(product_id, url_list) stat_db.close()
def import_spo_data(date): u""" 导入spo kpi: -- pv: 总日PV -- pv_influence: 搜索PV覆盖率 -- accuracy: 结果准确率 -- se_coverage: 需求覆盖率 :param date: :return: """ index_list = ['pv', 'pv_influence', 'accuracy', 'se_coverage', 'data_amount'] spo_product = get_spo_product(date) manual_kpi = get_manual_file(date, spo_product) date = time.strftime("%Y-%m-%d",time.strptime(date, "%Y%m%d")) data = [] if spo_product: for side in spo_product: index_dict = init_index_dict(index_list) for category in spo_product[side]: srcdict = spo_product[side][category]['srcids'] pid = spo_product[side][category]['id'] for val in index_list: temp = {} temp['side'] = side temp['product'] = category temp['date'] = date temp['pid'] = pid stats = None if val == 'pv': stats = get_pv(srcdict, side, date) elif val == 'pv_influence': stats = get_pv_influence(srcdict, side, date) elif val == 'accuracy': if manual_kpi[side].has_key(category) and manual_kpi[side][category]['accuracy']!='-': stats = {} stats['value'] = float(manual_kpi[side][category]['accuracy']) stats['amount_ratio'] = manual_kpi[side][category]['amount_ratio'] stats['last_modify_date'] = manual_kpi[side][category]['last_modify_date'] elif val == 'se_coverage': if manual_kpi[side].has_key(category) and manual_kpi[side][category]['se_coverage']!='-': stats = {} stats['value'] = float(manual_kpi[side][category]['se_coverage']) stats['amount_ratio'] = manual_kpi[side][category]['amount_ratio'] stats['last_modify_date'] = manual_kpi[side][category]['last_modify_date'] elif val == 'data_amount': if manual_kpi[side].has_key(category) and manual_kpi[side][category]['datamount']!='-': stats = {} stats['value'] = float(manual_kpi[side][category]['datamount']) stats['last_modify_date'] = manual_kpi[side][category]['last_modify_date'] temp['index'] = val if stats is not None and ('value' in stats) : temp['value'] = stats['value'] temp['last_modify_date'] = stats['last_modify_date'] if val == 'pv' or val == 'pv_influence' or val == 'data_amount': index_dict[val]['value'] += stats['value'] elif val == 'accuracy' or val == 'se_coverage': index_dict[val]['value'] += stats['value']*stats['amount_ratio'] if index_dict[val]['last_modify_date'] < stats['last_modify_date']: index_dict[val]['last_modify_date'] = stats['last_modify_date'] data.append(temp) #总量数据 for idx in index_dict: temp = {} temp['side'] = side temp['product'] = u'总体' temp['index'] = idx temp['date'] = date temp['pid'] = 0 temp['value'] = index_dict[idx]['value'] temp['last_modify_date'] = index_dict[idx]['last_modify_date'] data.append(temp) stat_db = db.SaveDataBase(date) print "remove data of %s..." % (date) stat_db.clear_spo_daily_summary(date, index_list) print "insert data into db" ret = stat_db.save_spo_index_info(data)
def import_spo_data(date, delta=1): u""" 导入spo质量效果数据,目前主要统计『总体』: -- module_sla: 模块稳定性 -- fe_sla: 前端交互稳定性 :param date: :param delta: :return: """ start_time = time.mktime(time.strptime(date, "%Y%m%d")) index_dict = {} arr = [] index_dict['module_sla'] = {'pc': '9', 'wise': '33'} index_dict['fe_sla'] = {'pc': '27', 'wise': '34'} kgqc = { 'host': 'kgqc.baidu.com', 'port': '80', 'url': "/statistics/api/getStatisticsForKgdc/" } data = {} for idx in index_dict: '''module_sla/fe_sla''' for client in index_dict[idx]: arr.append('%s' % index_dict[idx][client]) data['moduleId'] = ','.join(arr) client = Request() '''回溯最近15天的数据''' for i in range(15, -1, -1): search_date = time.strftime( '%Y%m%d', time.localtime(start_time - i * 24 * 60 * 60)) data['start'] = search_date data['end'] = search_date kgqc_data = client.get(kgqc['host'], kgqc['port'], kgqc['url'], data) if 'data' in kgqc_data: for idx_key in index_dict: stat_db = db.SaveDataBase(search_date) print "remove %s data of %s..." % (idx_key, search_date) stat_db.clear_spo_daily_summary(search_date, [idx_key]) for cli_key in index_dict[idx_key]: value = 0 print cli_key if index_dict[idx_key][cli_key] in kgqc_data['data'].keys( ): kgqc_item = kgqc_data['data'][index_dict[idx_key] [cli_key]]['data'] if kgqc_item['check_num'] > 0: value = round( float(kgqc_item['check_num'] - kgqc_item['alarm_num']) / kgqc_item['check_num'], 6) indata = {} indata['side'] = cli_key indata['product'] = u'总体' indata['pid'] = 0 indata['index'] = idx_key indata['value'] = value indata['date'] = search_date indata['last_modify_date'] = search_date print "insert data into db" ret = stat_db.save_spo_index_info([indata])