def breakouts_complements(): ''' 对爆发信息进一步补充(用于对爆发分类) ''' conn = MyConn() logspace = [(0, 100), (100, 180), (180, 326), (326, 589), (589, 1066), (1066, 3494), (3494, 30000)] blevel_num = len(logspace) logspace_count = dict(zip(logspace, blevel_num * [0])) breakout_tracks = [ r[0] for r in conn.query(targets=["DISTINCT(track_id)"], table="breakouts", conditions={"release_drive": 0}) ] for track_id in breakout_tracks: reviews_num, first_review, last_review = conn.query( targets=["reviews_num", "first_review", "last_review"], conditions={"track_id": track_id}, fetchall=False) breakouts = conn.query( targets=["flag", "reviews_num", "beta", "release_drive"], table="breakouts", conditions={"track_id": track_id}) days_num = (last_review - first_review).days # 除去爆发点的平均评论数 avg_normal = float((reviews_num - np.sum([b[1] for b in breakouts])) / (days_num - len(breakouts))) blevel_vec = blevel_num * [0] for b in breakouts: if b[3] == 1: continue # 不考虑release_drive爆发 for i in range(blevel_num): if b[2] >= logspace[i][0] and b[2] < logspace[i][1]: # 考察beta区间 blevel_vec[i] += 1 logspace_count[logspace[i]] += 1 break breakouts_num = int(np.sum(blevel_vec)) blevel = 0 for i in range(len(blevel_vec)): blevel += i * blevel_vec[i] blevel = blevel * 1.0 / breakouts_num settings = { "track_id": track_id, "average_reviews_num": avg_normal, "blevel_vec": ' '.join(map(str, blevel_vec)), "breakouts_num": breakouts_num, "blevel": blevel } conn.insert_or_update(table="breakouts_complements", settings=settings) # print(settings) print(track_id)
def upload_details(): ''' 将歌曲的基本信息上传至数据库(歌曲名称、歌手姓名、专辑名称...) ''' def extract_details(filename): with open(filename) as f: content = json.load(f) details = { "name": content["songs"][0]["name"], "artist": ",".join([item["name"] for item in content["songs"][0]["ar"]]), "pop": content["songs"][0]["pop"], "album": content["songs"][0]["al"]["name"] } return details read_path = "/Volumes/nmusic/NetEase2020/data/simple_proxied_tracks_details" conn = MyConn() for root, dirs, files in os.walk(read_path): for file in files: if "DS" in file: continue filepath = os.path.join(root, file) track_id = file[:-5] try: details = extract_details(filepath) except Exception as e: print(filepath) # print(traceback.format_exc()) print(e) # print(details) conn.insert_or_update(table="details", settings={ "track_id": track_id, "name": details["name"], "artist": details["artist"], "album": details["album"], "pop": details["pop"] })