Exemple #1
0
                    item1.get('comment_content'),
                    item1.get('comment_attitudes_count'),
                    item1.get('comment_time'),
                    item1.get('comment_source')
                ]

                comment.append(temp1)

            temp.append(comment)

            result.append(temp)
    print('[{}]--data process finally'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    return result


if __name__ == '__main__':
    # 清空目录
    doc.path_exists("/Users/red/Desktop/temp/news/data/sina/docx")

    result = sina_record()
    for temp in result:
        doc.save_sina_docx(
            temp,
            os.path.join("/Users/red/Desktop/temp/news/data/sina/docx",
                         temp[0] + ".docx"))
    print('[{}]--generation doc finally'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    print('[{}]--final'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
            #  只有文章
            article.append(temp)

            temp.append(temp_filter)
            # 文章加评论
            article_comment.append(temp)
            # excel索引
            excel.append(excel_temp)
    print('[{}]--data process finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    return article_comment, article, excel


if __name__ == '__main__':
    article_comment, article, excel = get_record()
    # article存储路径
    article_path = "/Users/red/Desktop/temp/news/data/sj_data/sina_data/article_txt"
    doc.path_exists(article_path)
    # article_comment存储路径
    article_comment_path = "/Users/red/Desktop/temp/news/data/sj_data/sina_data/article_comment_txt"
    doc.path_exists(article_comment_path)

    for i in range(len(excel)):
        file_util.write_file(os.path.join(article_path, excel[i][0] + '.txt'), article[i][0])
        file_util.write_file(os.path.join(article_comment_path, excel[i][0] + '.txt'),
                             str(article_comment[i][0]) + '\n\n' + str(article_comment[i][1]))
    print('[{}]--file write finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))

    title = ['文件编号', 'url', '时间', '话题']
    xlwt_util.save_xlwt(4, 'sheet1', title, excel, '/Users/red/Desktop/temp/news/data/sj_data/sina_data/index.xls')
    print('[{}]--excel write finally'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
            ]
            temp = [filters.stripTagSimple(item.get('article_content'))]
            #  只有文章
            article.append(temp)

            # excel索引
            excel.append(excel_temp)
    print('[{}]--data process finally'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    return article, excel


if __name__ == '__main__':
    article, excel = get_record()
    # article存储路径
    article_path = "/Users/red/Desktop/temp/news/data/sj_data/sohu_data/article_txt"
    doc.path_exists(article_path)

    for i in range(len(excel)):
        file_util.write_file(os.path.join(article_path, excel[i][0] + '.txt'),
                             article[i][0])
    print('[{}]--file write finally'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))

    title = ['文件编号', 'url', '时间', '分类']
    xlwt_util.save_xlwt(
        4, 'sheet1', title, excel,
        '/Users/red/Desktop/temp/news/data/sj_data/sohu_data/index.xls')
    print('[{}]--excel write finally'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))