def parse_expert(info, cla): """ :param info: 专家信息 :param cla: 0:足球专家 1:篮球专家 """ sql = Sql() expert = dict(table="expert") expert["avg_odds"] = info.get("avgOdds") # 平均赔率 expert["ball_rate"] = info.get("bAllRate") # 最近预测结果 expert["earning_rate"] = info.get("earningRate", 0) # 盈利率 expert["hit_rate"] = info.get("hitRate", 0) expert["max_win"] = info.get("maxWin") expert["nickname"] = info.get("nickname") # 昵称 expert["show_hit_rate"] = info.get("showHitRate") # ??? expert["slogan"] = info.get("slogan") # 职业 expert["trend"] = info.get("trend") # ??? expert["id"] = info.get("userId") # 用户id expert["weight"] = info.get("weight") # 体重 field = "foot" if cla == 0 else "basket" # 判断是足球专家还是篮球专家 expert[field] = 1 avatar_url = info.get("avatar") expert["avatar"] = str(expert.get("id")) + "." + avatar_url.split(".")[-1] ret = sql.save_if_not_exist(expert) if ret == 0: expert["table"] = "expert" sql.update(item=expert, field=field) else: avatar_path = cf.get("path", "avatar") avatar_url = info.get("avatar") # 头像 content = get_content_data(avatar_url) if content: save_pic( str(avatar_path + str(expert.get("id"))) + ".jpg", content)
def parse_hot_expert(data, status, index): expert = dict(table="hot_expert") expert["id"] = data.get("userId") expert["popularity"] = data.get("popularity") expert["earning_rate"] = data.get("earningRate") expert["type"] = status expert["top_index"] = index sql = Sql() sql.save(expert)
def start_articles(): print("start") sql = Sql() ret, err = sql.execute("select id from expert") if err != 0: return ret = [i[0] for i in ret] for id in ret: get_expert_league_info(id)
def get_articles(t): while 1: sql = Sql() ret, err = sql.execute("select id from expert") expert_ids = [] if err == 0: expert_ids = [id[0] for id in ret] for id in expert_ids: get_expert_articles(id) time.sleep(t)
def parse_basketball_match(data): sql = Sql() l = data.get("leagueMatch") league_match = dict(table="leaguematch") league_match["id"] = l.get("leagueId") league_match["name"] = l.get("leagueName") sql.save_if_not_exist(league_match) g1 = data.get("guestTeam") parse_team(g1) g2 = data.get("homeTeam") parse_team(g2) match = dict(table="matches") m = data.get("basketballLiveScore") match["guest_score"] = m.get("guestScore") match["home_score"] = m.get("homeScore") match["match_id"] = m.get("matchId") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") match["status"] = m.get("status") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") match["match_type"] = 1 match_time = data.get("matchTime") if match_time: match_time *= pow(10, -3) match["match_time"] = datetime.datetime.fromtimestamp(match_time) match["league_id"] = l.get("leagueId") match["league_name"] = l.get("leagueName") sql.reset_if_exist(match, "info_id") sql.close()
def parse_expert_articles(data, expert_id): sql = Sql() match_info = data.get("earliestMatch") article = dict(table="articles") article["id"] = data.get("threadId") article["expert_id"] = expert_id article["title"] = data.get("title") article["lottery_category_id"] = data.get("lotteryCategoryId") article["lottery_category_name"] = data.get("lotteryCategoryName") article["is_win"] = data.get("isWin") article["price"] = data.get("price") publish_time = data.get("publishTime") article["publish_time"] = publish_time article["league_id"] = match_info.get("leagueId") ret = sql.save_if_not_exist(article) return 0, article.get("id")
def prser_news_content(news, data): docid = news.get("docid") if data: data = data.get(docid) else: return body = data.get("body") sql = Sql() ret = sql.is_exists(news, "docid") if ret == 0: # 去除视频标签 body = re.sub("\<!--VIDEO#\d+\-\-\>", "", body) body = re.sub("<p>\s+<b>【欢迎搜索关注公众号“足球大会”.*", "", body) # 将图片标签替换成静态图片 for i, img in enumerate(data.get("img"), 0): src = img.get("src") digit = hash(data.get("docid")) _, s = divmod(digit, 26) dir = chr(s + 97) base = cf.get("path", "news") path = os.path.join(base, dir, docid + f"_{i}." + src.split(".")[-1]) img_path = os.path.join("img/news", dir, docid + f"_{i}." + src.split(".")[-1]) body = body.replace(img.get("ref"), f'<img src="{img_path}">') img_content = get_content_data(img.get("src")) save_pic(path, img_content) news["content"] = body sql.save(news) sql.close()
def parse_team(data): sql = Sql() team = dict(table="team") team["team_type"] = 0 id = data.get("teamId") team["id"] = id team["full_name"] = data.get("fullName") icon_url = data.get("teamIcon") icon_name = "" if icon_url: icon_type = icon_url.split(".")[-1] icon_name = f"{id}.{icon_type}" team["icon"] = icon_name team["name"] = data.get("teamName") ret = sql.save_if_not_exist(team) if ret == 1 and icon_name: content = get_content_data(icon_url) base_path = cf.get("path", "team") icon_path = os.path.join(base_path, icon_name) save_pic(icon_path, content) sql.close()
def get_article_detail(article_id): sql = Sql() url = cf.get("api", "expert_article_detail") url = url.replace("@", str(article_id)) data = get_json_data(url).get("data") article_detail = dict(table="article_details") article_detail["id"] = article_id article_detail["content"] = data.get("content") sql.save_if_not_exist(article_detail) sql.close()
def parse_football_match(data): sql = Sql() l = data.get("leagueMatch") league_match = dict(table="leaguematch") league_match["id"] = l.get("leagueId") league_match["name"] = l.get("leagueName") sql.save_if_not_exist(league_match) g1 = data.get("guestTeam") parse_team(g1) g2 = data.get("homeTeam") parse_team(g2) match = dict(table="matches") d = data.get("footballLiveScore") match["match_id"] = d.get("matchId") match["info_id"] = d.get("matchInfoId") match["match_status"] = d.get("matchStatus") match["status"] = d.get("status") match["card_highlight"] = d.get("cardHighlight") match["corner_kick"] = d.get("cornerKick") match["guest_red_card"] = d.get("guestRedCard") match["guest_score"] = d.get("guestScore") match["guest_yellow_card"] = d.get("guestYellowCard") match["half_score"] = d.get("halfScore") match["home_red_card"] = d.get("homeRedCard") match["home_score"] = d.get("homeScore") match["home_yellow_card"] = d.get("homeYellowCard") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") match["league_id"] = l.get("leagueId") match["league_name"] = l.get("leagueName") match["match_type"] = 0 match_time = data.get("matchTime") if match_time: match_time *= pow(10, -3) match["match_time"] = datetime.datetime.fromtimestamp(match_time) sql.reset_if_exist(match, "info_id") sql.close()
def get_articles_detail(article_id): print(article_id) article_detail = dict(table="article_details") article_detail["id"] = article_id detail_url = cf.get("api", "article_detail_url") detail_url = detail_url.replace("articleid", str(article_id)) data = get_json_data(detail_url).get("data") content = data.get("content") article_detail["content"] = content sql = Sql() sql.save_if_not_exist(article_detail) sql.close() parse_match_list(data.get("matchList"), article_id)
def start_get_hot_expert(t): while 1: logging.info("start get hot expert") sql = Sql() try: sql.execute("truncate table hot_expert") sql.db.commit() except Exception as e: print(e) sql.close() get_hot_expert(0) get_hot_expert(1) time.sleep(t)
def get_expert_league_info(expert_id): url = cf.get("api", "expert_league_info") url = url.replace("@", str(expert_id)) data = get_json_data(url) if not data: return data = data.get("data") for m in data: sql = Sql() expert_league = dict(table="expert_leaguematches") expert_league["expert_id"] = expert_id league_id = m.get("leagueMatchId") expert_league["leaguematch_id"] = league_id expert_league["leaguematch_name"] = m.get("leagueMatchName") expert_league["best_hitrate"] = m.get("bestMatchesHitRate") expert_league["hitrate_desc"] = m.get("totalHitRateDesc") if not sql.is_exists_by_tow(expert_league, "expert_id", "leaguematch_id"): sql.save(expert_league) print("save") sql.close() get_expert_league_articles(expert_id, league_id)
def get_expert_articles(expert_id): url = cf.get("api", "expert_articles_url") url = url.replace("userid", str(expert_id)) try: data = get_json_data(url).get("data") expert_detail = data.get("expertDetail") expert = dict( table="expert", id=expert_id, follower=expert_detail.get("follower"), description=expert_detail.get("description") ) sql = Sql() sql.update_fields(expert) sql.close() out_sale_data = get_json_data(url).get("data").get("outSalePlanList") except AttributeError as e: print(e) return for data in out_sale_data: ret, article_id = parse_expert_articles(data, expert_id) ret = 1 if ret != 0: get_articles_detail(article_id)
def get_expert_league_articles(expert_id, league_id): url = cf.get("api", "expert_league_articles") url = url.replace("expertid", str(expert_id)).replace("leagueid", str(league_id)) data = get_json_data(url).get("data").get("threadList") print(json.dumps(data, ensure_ascii=False)) for a in data: sql = Sql() print(json.dumps(a, ensure_ascii=False)) article = dict(table="articles") article["id"] = a.get("threadId") article["title"] = a.get("threadTitle") article["expert_id"] = expert_id article["lottery_category_id"] = a.get("lotteryCategoryId") article["lottery_category_name"] = a.get("lotteryCategoryName") article["is_win"] = a.get("isWin") article["publish_time"] = a.get("publishTime") article["price"] = a.get("price") article["league_id"] = league_id for m in a.get("matchList"): match = dict(table="matches") match["category_id"] = m.get("categoryId") match["category_name"] = m.get("categoryName") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") if match.get("match_status") == 3: match["status"] = "完" else: match["status"] = "未" match_time = m.get("matchTime") match_time = match_time.replace("/", "-").replace("/", "-") if not re.search("\d{4}", match_time): match_time = str(datetime.date.today().year) + "-" + match_time print(match_time) match["match_time"] = match_time league = dict(table="leaguematch") league_id = m.get("leagueId") league_name = m.get("leagueName") league["id"] = league_id league["name"] = league_name sql.save_if_not_exist(league) match["league_id"] = league_id match["league_name"] = league_name match["guest_name"] = m.get("guestName") match["guest_score"] = m.get("guestScore") match["home_name"] = m.get("homeName") match["home_score"] = m.get("homeScore") sql.save_if_not_exist(match, "info_id") article_match = dict(table="article_match") article_match["article_id"] = article["id"] article_match["info_id"] = match["info_id"] if not sql.is_exists_by_tow(article_match, "article_id", "info_id"): sql.save(article_match) ret = sql.save_if_not_exist(article) if ret == 0: article["table"] = "articles" sql.update(article, "league_id") sql.close()
def start_article_details(): sql = Sql() ret, err = sql.execute("select id from articles") for r in ret: get_article_detail(r[0])
def parse_match_list(match_list, article_id): sql = Sql() for m in match_list: match = dict(table="matches") # print(json.dumps(m,ensure_ascii=False)) match["category_id"] = m.get("categoryId") match["category_name"] = m.get("categoryName") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") if match.get("match_status") == 3: match["status"] = "完" else: match["status"] = "未" # match_time = m.get("matchTime") match_time = m.get("matchTimeAc") match_time = match_time.replace("/", "-").replace("/", "-") if not re.search("\d{4}", match_time): match_time = str(datetime.date.today().year) + "-" + match_time match["match_time"] = match_time league = dict(table="leaguematch") league_id = m.get("leagueId") league_name = m.get("leagueName") league["id"] = league_id league["name"] = league_name sql.save_if_not_exist(league) match["league_id"] = league_id match["league_name"] = league_name g1 = m.get("guestTeam") parse_team(g1) g2 = m.get("homeTeam") parse_team(g2) match["guest_name"] = g1.get("teamName") match["home_name"] = g2.get("teamName") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") # match["guest_name"] = m.get("guestName") match["guest_score"] = m.get("guestScore") # match["home_name"] = m.get("homeName") match["home_score"] = m.get("homeScore") sql.save_if_not_exist(match, "info_id") article_match = dict(table="article_match") article_match["article_id"] = article_id article_match["info_id"] = match["info_id"] print("article_match", article_match) if not sql.is_exists_by_tow(article_match, "article_id", "info_id"): sql.save(article_match) sql.close()