Python Csv_base.read_csv_file Examples

Programming Language: Python

Namespace/Package Name: common.inc_csv

Class/Type: Csv_base

Method/Function: read_csv_file

Examples at hotexamples.com: 2

Python Csv_base.read_csv_file - 2 examples found. These are the top rated real world Python examples of common.inc_csv.Csv_base.read_csv_file extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Csv_base(17)

write_csv_file_line(14)

read_csv_file(2)

read_csv_file_dict(2)

write_csv_file_dictLines(2)

Example #1

Show file

#!/usr/bin/env python
# coding=utf-8

from common.inc_csv import Csv_base
from lxml import html
import re


def readFile(filePath=''):
    pass


if __name__ == '__main__':
    filepath = '../data/问答语料_1.0.txt'
    file = Csv_base()
    list = file.read_csv_file(filepath)
    for i in range(len(list)):
        if (i > 3):
            row = list[i]
            rows = str(row[0]).split("\t")
            html_text = rows[1].replace("[", '').replace("]", "")
            # 正则匹配 re.match从字符串起始处匹配。
            html_text = re.sub(re.compile(r"<script.*?</script>", re.S), "",
                               html_text)

            print(html_text)
            tree = html.fromstring(html_text)
            texts = tree.xpath('.//text()')
            text = ""
            for a in texts:
                text = text + str(a).replace("\\n", ".").strip()

Example #2

Show file

    print(a)
    a = re.sub(re.compile(r"收藏查看我的收藏(\d+)有用(.*?)(\d+)已投票(\d+)", re.S), "", a)
    a = str(a).replace("编辑锁定", " ").strip()
    a = str(a).replace("讨论999", " ").strip()
    a = str(a).replace("本词条缺少概述图，补充相关内容使词条更完整，还能快速升级，赶紧来编辑吧！", " ").strip()
    a = str(a).replace(
        "百度百科内容由网友共同编辑，如您发现自己的词条内容不准确或不完善，欢迎使用本人词条编辑服务（免费）参与修正。", " ").strip()
    a = str(a).replace("立即前往 >>", " ").strip()
    print(a)
    return a


if __name__ == '__main__':

    csv_data_path = "../../data/百科候选关键词.csv"
    rows = csv.read_csv_file(csv_data_path)
    for row in rows:
        try:
            html_data_path = str(row[11]).replace("`", "")
            #print("../"+html_data_path)
            html_context = file.open_source2(file_path="../" + html_data_path +
                                             ".html")

            # 正则匹配 re.match从字符串起始处匹配。
            html_text = re.sub(re.compile(r"<script.*?</script>", re.S), "",
                               html_context)
            tree = html.fromstring(html_text)
            texts = tree.xpath('.//div[@class="main-content"]//text()')
            text = ""
            for a in texts:
                text = text + str(a).replace("\\n", " ").strip()