def prser_news_content(news, data): docid = news.get("docid") if data: data = data.get(docid) else: return body = data.get("body") sql = Sql() ret = sql.is_exists(news, "docid") if ret == 0: # 去除视频标签 body = re.sub("\<!--VIDEO#\d+\-\-\>", "", body) body = re.sub("<p>\s+<b>【欢迎搜索关注公众号“足球大会”.*", "", body) # 将图片标签替换成静态图片 for i, img in enumerate(data.get("img"), 0): src = img.get("src") digit = hash(data.get("docid")) _, s = divmod(digit, 26) dir = chr(s + 97) base = cf.get("path", "news") path = os.path.join(base, dir, docid + f"_{i}." + src.split(".")[-1]) img_path = os.path.join("img/news", dir, docid + f"_{i}." + src.split(".")[-1]) body = body.replace(img.get("ref"), f'<img src="{img_path}">') img_content = get_content_data(img.get("src")) save_pic(path, img_content) news["content"] = body sql.save(news) sql.close()
def parse_basketball_match(data): sql = Sql() l = data.get("leagueMatch") league_match = dict(table="leaguematch") league_match["id"] = l.get("leagueId") league_match["name"] = l.get("leagueName") sql.save_if_not_exist(league_match) g1 = data.get("guestTeam") parse_team(g1) g2 = data.get("homeTeam") parse_team(g2) match = dict(table="matches") m = data.get("basketballLiveScore") match["guest_score"] = m.get("guestScore") match["home_score"] = m.get("homeScore") match["match_id"] = m.get("matchId") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") match["status"] = m.get("status") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") match["match_type"] = 1 match_time = data.get("matchTime") if match_time: match_time *= pow(10, -3) match["match_time"] = datetime.datetime.fromtimestamp(match_time) match["league_id"] = l.get("leagueId") match["league_name"] = l.get("leagueName") sql.reset_if_exist(match, "info_id") sql.close()
def get_article_detail(article_id): sql = Sql() url = cf.get("api", "expert_article_detail") url = url.replace("@", str(article_id)) data = get_json_data(url).get("data") article_detail = dict(table="article_details") article_detail["id"] = article_id article_detail["content"] = data.get("content") sql.save_if_not_exist(article_detail) sql.close()
def get_articles_detail(article_id): print(article_id) article_detail = dict(table="article_details") article_detail["id"] = article_id detail_url = cf.get("api", "article_detail_url") detail_url = detail_url.replace("articleid", str(article_id)) data = get_json_data(detail_url).get("data") content = data.get("content") article_detail["content"] = content sql = Sql() sql.save_if_not_exist(article_detail) sql.close() parse_match_list(data.get("matchList"), article_id)
def start_get_hot_expert(t): while 1: logging.info("start get hot expert") sql = Sql() try: sql.execute("truncate table hot_expert") sql.db.commit() except Exception as e: print(e) sql.close() get_hot_expert(0) get_hot_expert(1) time.sleep(t)
def parse_match_list(match_list, article_id): sql = Sql() for m in match_list: match = dict(table="matches") # print(json.dumps(m,ensure_ascii=False)) match["category_id"] = m.get("categoryId") match["category_name"] = m.get("categoryName") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") if match.get("match_status") == 3: match["status"] = "完" else: match["status"] = "未" # match_time = m.get("matchTime") match_time = m.get("matchTimeAc") match_time = match_time.replace("/", "-").replace("/", "-") if not re.search("\d{4}", match_time): match_time = str(datetime.date.today().year) + "-" + match_time match["match_time"] = match_time league = dict(table="leaguematch") league_id = m.get("leagueId") league_name = m.get("leagueName") league["id"] = league_id league["name"] = league_name sql.save_if_not_exist(league) match["league_id"] = league_id match["league_name"] = league_name g1 = m.get("guestTeam") parse_team(g1) g2 = m.get("homeTeam") parse_team(g2) match["guest_name"] = g1.get("teamName") match["home_name"] = g2.get("teamName") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") # match["guest_name"] = m.get("guestName") match["guest_score"] = m.get("guestScore") # match["home_name"] = m.get("homeName") match["home_score"] = m.get("homeScore") sql.save_if_not_exist(match, "info_id") article_match = dict(table="article_match") article_match["article_id"] = article_id article_match["info_id"] = match["info_id"] print("article_match", article_match) if not sql.is_exists_by_tow(article_match, "article_id", "info_id"): sql.save(article_match) sql.close()
def parse_football_match(data): sql = Sql() l = data.get("leagueMatch") league_match = dict(table="leaguematch") league_match["id"] = l.get("leagueId") league_match["name"] = l.get("leagueName") sql.save_if_not_exist(league_match) g1 = data.get("guestTeam") parse_team(g1) g2 = data.get("homeTeam") parse_team(g2) match = dict(table="matches") d = data.get("footballLiveScore") match["match_id"] = d.get("matchId") match["info_id"] = d.get("matchInfoId") match["match_status"] = d.get("matchStatus") match["status"] = d.get("status") match["card_highlight"] = d.get("cardHighlight") match["corner_kick"] = d.get("cornerKick") match["guest_red_card"] = d.get("guestRedCard") match["guest_score"] = d.get("guestScore") match["guest_yellow_card"] = d.get("guestYellowCard") match["half_score"] = d.get("halfScore") match["home_red_card"] = d.get("homeRedCard") match["home_score"] = d.get("homeScore") match["home_yellow_card"] = d.get("homeYellowCard") match["guest_id"] = g1.get("teamId") match["home_id"] = g2.get("teamId") match["league_id"] = l.get("leagueId") match["league_name"] = l.get("leagueName") match["match_type"] = 0 match_time = data.get("matchTime") if match_time: match_time *= pow(10, -3) match["match_time"] = datetime.datetime.fromtimestamp(match_time) sql.reset_if_exist(match, "info_id") sql.close()
def parse_team(data): sql = Sql() team = dict(table="team") team["team_type"] = 0 id = data.get("teamId") team["id"] = id team["full_name"] = data.get("fullName") icon_url = data.get("teamIcon") icon_name = "" if icon_url: icon_type = icon_url.split(".")[-1] icon_name = f"{id}.{icon_type}" team["icon"] = icon_name team["name"] = data.get("teamName") ret = sql.save_if_not_exist(team) if ret == 1 and icon_name: content = get_content_data(icon_url) base_path = cf.get("path", "team") icon_path = os.path.join(base_path, icon_name) save_pic(icon_path, content) sql.close()
def get_expert_league_info(expert_id): url = cf.get("api", "expert_league_info") url = url.replace("@", str(expert_id)) data = get_json_data(url) if not data: return data = data.get("data") for m in data: sql = Sql() expert_league = dict(table="expert_leaguematches") expert_league["expert_id"] = expert_id league_id = m.get("leagueMatchId") expert_league["leaguematch_id"] = league_id expert_league["leaguematch_name"] = m.get("leagueMatchName") expert_league["best_hitrate"] = m.get("bestMatchesHitRate") expert_league["hitrate_desc"] = m.get("totalHitRateDesc") if not sql.is_exists_by_tow(expert_league, "expert_id", "leaguematch_id"): sql.save(expert_league) print("save") sql.close() get_expert_league_articles(expert_id, league_id)
def get_expert_articles(expert_id): url = cf.get("api", "expert_articles_url") url = url.replace("userid", str(expert_id)) try: data = get_json_data(url).get("data") expert_detail = data.get("expertDetail") expert = dict( table="expert", id=expert_id, follower=expert_detail.get("follower"), description=expert_detail.get("description") ) sql = Sql() sql.update_fields(expert) sql.close() out_sale_data = get_json_data(url).get("data").get("outSalePlanList") except AttributeError as e: print(e) return for data in out_sale_data: ret, article_id = parse_expert_articles(data, expert_id) ret = 1 if ret != 0: get_articles_detail(article_id)
def get_expert_league_articles(expert_id, league_id): url = cf.get("api", "expert_league_articles") url = url.replace("expertid", str(expert_id)).replace("leagueid", str(league_id)) data = get_json_data(url).get("data").get("threadList") print(json.dumps(data, ensure_ascii=False)) for a in data: sql = Sql() print(json.dumps(a, ensure_ascii=False)) article = dict(table="articles") article["id"] = a.get("threadId") article["title"] = a.get("threadTitle") article["expert_id"] = expert_id article["lottery_category_id"] = a.get("lotteryCategoryId") article["lottery_category_name"] = a.get("lotteryCategoryName") article["is_win"] = a.get("isWin") article["publish_time"] = a.get("publishTime") article["price"] = a.get("price") article["league_id"] = league_id for m in a.get("matchList"): match = dict(table="matches") match["category_id"] = m.get("categoryId") match["category_name"] = m.get("categoryName") match["info_id"] = m.get("matchInfoId") match["match_status"] = m.get("matchStatus") if match.get("match_status") == 3: match["status"] = "完" else: match["status"] = "未" match_time = m.get("matchTime") match_time = match_time.replace("/", "-").replace("/", "-") if not re.search("\d{4}", match_time): match_time = str(datetime.date.today().year) + "-" + match_time print(match_time) match["match_time"] = match_time league = dict(table="leaguematch") league_id = m.get("leagueId") league_name = m.get("leagueName") league["id"] = league_id league["name"] = league_name sql.save_if_not_exist(league) match["league_id"] = league_id match["league_name"] = league_name match["guest_name"] = m.get("guestName") match["guest_score"] = m.get("guestScore") match["home_name"] = m.get("homeName") match["home_score"] = m.get("homeScore") sql.save_if_not_exist(match, "info_id") article_match = dict(table="article_match") article_match["article_id"] = article["id"] article_match["info_id"] = match["info_id"] if not sql.is_exists_by_tow(article_match, "article_id", "info_id"): sql.save(article_match) ret = sql.save_if_not_exist(article) if ret == 0: article["table"] = "articles" sql.update(article, "league_id") sql.close()