if __name__ == "__main__": ''' 预处理 并保存 ''' files = os.listdir(RFDIR) fdf = list(map(info, files)) fdf = pd.DataFrame(data=fdf, columns=['date', 'rank', 'cid', 'fname']).sort_values(['date', 'rank']) fdf = fdf[fdf['rank'] <= 15] fdf['fpath'] = fdf.apply(lambda x: os.path.join(RFDIR, x['fname']), axis=1) texts = dict() for date, frame in tqdm(fdf.groupby('date')['fpath']): text = [] for fpath in frame: text.extend(Cleaner.preprocess_danmu(fpath)) write(os.path.join(WFDIR, date + '_danmu.txt'), text) texts[date] = text # ''' # 预处理后,可直接读取 # ''' # texts = dict() # files = os.listdir(WFDIR) # for f in tqdm(files): # fpath = os.path.join(WFDIR, f) # date = f.split('_')[0] # text = read(fpath) # texts[date] = text ####################################################