Esempio n. 1
0
if __name__ == "__main__":
    '''
    预处理 并保存
    '''
    files = os.listdir(RFDIR)
    fdf = list(map(info, files))
    fdf = pd.DataFrame(data=fdf,
                       columns=['date', 'rank', 'cid',
                                'fname']).sort_values(['date', 'rank'])
    fdf = fdf[fdf['rank'] <= 15]
    fdf['fpath'] = fdf.apply(lambda x: os.path.join(RFDIR, x['fname']), axis=1)
    texts = dict()
    for date, frame in tqdm(fdf.groupby('date')['fpath']):
        text = []
        for fpath in frame:
            text.extend(Cleaner.preprocess_danmu(fpath))
        write(os.path.join(WFDIR, date + '_danmu.txt'), text)
        texts[date] = text

    # '''
    # 预处理后,可直接读取
    # '''
    # texts = dict()
    # files = os.listdir(WFDIR)
    # for f in tqdm(files):
    #     fpath = os.path.join(WFDIR, f)
    #     date = f.split('_')[0]
    #     text = read(fpath)
    #     texts[date] = text

    ####################################################