item1.get('comment_content'), item1.get('comment_attitudes_count'), item1.get('comment_time'), item1.get('comment_source') ] comment.append(temp1) temp.append(comment) result.append(temp) print('[{}]--data process finally'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return result if __name__ == '__main__': # 清空目录 doc.path_exists("/Users/red/Desktop/temp/news/data/sina/docx") result = sina_record() for temp in result: doc.save_sina_docx( temp, os.path.join("/Users/red/Desktop/temp/news/data/sina/docx", temp[0] + ".docx")) print('[{}]--generation doc finally'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) print('[{}]--final'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
# 只有文章 article.append(temp) temp.append(temp_filter) # 文章加评论 article_comment.append(temp) # excel索引 excel.append(excel_temp) print('[{}]--data process finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return article_comment, article, excel if __name__ == '__main__': article_comment, article, excel = get_record() # article存储路径 article_path = "/Users/red/Desktop/temp/news/data/sj_data/sina_data/article_txt" doc.path_exists(article_path) # article_comment存储路径 article_comment_path = "/Users/red/Desktop/temp/news/data/sj_data/sina_data/article_comment_txt" doc.path_exists(article_comment_path) for i in range(len(excel)): file_util.write_file(os.path.join(article_path, excel[i][0] + '.txt'), article[i][0]) file_util.write_file(os.path.join(article_comment_path, excel[i][0] + '.txt'), str(article_comment[i][0]) + '\n\n' + str(article_comment[i][1])) print('[{}]--file write finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) title = ['文件编号', 'url', '时间', '话题'] xlwt_util.save_xlwt(4, 'sheet1', title, excel, '/Users/red/Desktop/temp/news/data/sj_data/sina_data/index.xls') print('[{}]--excel write finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
] temp = [filters.stripTagSimple(item.get('article_content'))] # 只有文章 article.append(temp) # excel索引 excel.append(excel_temp) print('[{}]--data process finally'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return article, excel if __name__ == '__main__': article, excel = get_record() # article存储路径 article_path = "/Users/red/Desktop/temp/news/data/sj_data/sohu_data/article_txt" doc.path_exists(article_path) for i in range(len(excel)): file_util.write_file(os.path.join(article_path, excel[i][0] + '.txt'), article[i][0]) print('[{}]--file write finally'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) title = ['文件编号', 'url', '时间', '分类'] xlwt_util.save_xlwt( 4, 'sheet1', title, excel, '/Users/red/Desktop/temp/news/data/sj_data/sohu_data/index.xls') print('[{}]--excel write finally'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))