Ejemplo n.º 1
0
    def get_value_and_key(file_path):
        """
        获取 xml 文件的 key - value
        :param file_path: 文件路径
        :return: dic[key]-value
        """
        if not file_path or not os.path.exists(file_path):
            Log.error("xml 文件不存在")
            return
        xml_doc = xml.dom.minidom.parse(file_path)
        nodes = xml_doc.getElementsByTagName('string')
        dic = collections.OrderedDict()
        for index, node in enumerate(nodes):
            key = node.getAttribute("name")
            value = XMLParse.get_text_node_value(node)
            dic[key] = value
            # Log.info("%s : %s" % (key, value))

        array_nodes = xml_doc.getElementsByTagName("string-array")
        for array_node in array_nodes:
            key = array_node.getAttribute('name')
            child_nodes = array_node.getElementsByTagName('item')
            for idx, child_node in enumerate(child_nodes):
                newKey = key + "-INDEX-" + str(idx)
                value = XMLParse.get_text_node_value(child_node)
                dic[newKey] = value
        return dic
Ejemplo n.º 2
0
    def xls2xml(self, xls_path, file_path, target_language, target_dir_path):
        """
        :param xls_path: 表格路径
        :param file_path: 目标文件路径
        :param target_language: 目标语言
        :param target_dir_path: 目标文件目录
        """
        Log.info("--- xls2xml ---")

        # 输入 excel
        if not xls_path or not os.path.exists(xls_path):
            return Constant.Error(Constant.ERROR_EXCEL_NOT_EXIST)

        xlsPath = xls_path
        self.filePath = file_path
        self.targetLanguage = target_language
        self.dirPath = target_dir_path

        # 获取 xls 对象,以及目标 sheet(这里默认为第一张表,index 从0开始)
        xlsParse = XLSParse()
        xlsParse.open_excel(xlsPath)

        sheet = xlsParse.sheet_by_index(0)

        Log.info("name = %s, rows number = %s,clos number = %s" % (sheet.name, sheet.nrows, sheet.ncols))
        return self.convert(sheet)
Ejemplo n.º 3
0
def addParser():
    parser = OptionParser()
    parser.add_option("-i", "--input", help="excel file path")
    parser.add_option("-f", "--targetFilePath", help="means target output is xml file and input the file path")
    parser.add_option("-l", "--targetLanguage", help="target language shortname(just for output is file)")
    parser.add_option("-d", "--targetDirPath", help="means target output is dir contains xml file(s)")

    (options, args) = parser.parse_args()
    Log.info("options: %s, args: %s" % (options, args))
    return options
Ejemplo n.º 4
0
    def update_xml_value(file_path, keys, values):
        # Log.info("--- updating xml... --- %s" % file_path)
        if not os.path.exists(file_path):
            return
        # Log.info ("--- string ---")
        # 读取文档
        xml_doc = xml.dom.minidom.parse(file_path)
        # filename
        nodes = xml_doc.getElementsByTagName('string')
        for node in nodes:
            xmlKey = node.getAttribute("name")
            xmlValue = ""  # 改变量仅用于输出
            if node.firstChild is None:
                continue
            xmlValue = XMLParse.get_text_node_value(node)

            for index, key in enumerate(keys):
                if key == xmlKey and len(values[index]) != 0:
                    node.firstChild.data = values[index]
                    Log.info("%s : %s -- >%s " %
                             (xmlKey, xmlValue, node.firstChild.data))
        # Log.info("--- string end ---\n")

        # 数组
        # Log.info("--- array ---")
        array_nodes = xml_doc.getElementsByTagName('string-array')
        for array_node in array_nodes:
            xmlKey = array_node.getAttribute('name')

            child_nodes = array_node.getElementsByTagName('item')
            for idx, child_node in enumerate(child_nodes):
                newKey = xmlKey + "-INDEX-" + str(idx)

                xmlValue = child_node.firstChild.data
                for index, key in enumerate(keys):
                    if key == newKey and len(values[index]) != 0:
                        child_node.firstChild.data = values[index]
                        Log.info(
                            "%s : %s --> %s" %
                            (newKey, xmlValue, child_node.firstChild.data))
        # Log.info("--- array end ---\n")
        writeFile = open(file_path, 'w')
        writeFile.write(xml_doc.toxml('utf-8'))
        writeFile.close()
Ejemplo n.º 5
0
    def update_multi_xml_value(sub_dir_path, keys, values, modules):
        Log.info("\n\n" + sub_dir_path + "\n\n")
        '''
        sub_dir_path: 目标子目录,比如 value-zh
        '''
        if len(modules) == 0:
            return

        # 先排序,把 excel 中的统一 module 排到一起
        # 排序,分块处理
        current_module = modules[0]
        module_length_list = []
        current_module_len = 0
        modules_new = []
        values_new = []
        keys_new = []
        for mid, module in enumerate(modules):
            if module is None or module == "":
                continue
            if current_module != module:
                module_length_list.append(current_module_len)
                current_module = module
                current_module_len = 0

            modules_new.append(module)
            values_new.append(values[mid])
            keys_new.append(keys[mid])
            current_module_len += 1

        module_length_list.append(current_module_len)

        start = 0
        end = 0
        for module_len in module_length_list:
            end += module_len
            subKeys = keys_new[start:end]
            subValues = values_new[start:end]
            module = modules_new[start]
            start += module_len
            filePath = sub_dir_path + module + ".xml"

            XMLParse.update_xml_value(filePath, subKeys, subValues)
Ejemplo n.º 6
0
import DocUtils
import os
from ExcelUtils import Excel
import shutil
import re
import time
from LogUtils import Log


log = Log("config/config.txt")
tdr_data_list = []


def test():
    log.log_info("开始处理!")
    if not os.path.exists("excel"):
        os.mkdir("excel")
    if not os.path.exists("docx"):
        os.mkdir("docx")
    clear_dir("excel")
    clear_dir("docx")
    DocUtils.doc_2_docx(os.getcwd() + "/doc", os.getcwd() + "/docx")
    for parent, directory, files in os.walk(os.getcwd() + "/docx"):
        for f in files:
            if not re.match(".*格式件.*", f):
                continue
            try:
             deal_data_2_excel(parent + "/" + f)
            except Exception:
                # log.log_error(e)
                log.log_error("文件:" + parent + "/" + f + "格式有误,读取失败,请人工读取!")
Ejemplo n.º 7
0
url = 'http://util.online/spider/api/mail'
weburl = 'https://util.online/spider/novel/'
body = {
    "to": "",
    "subject": "XX小说出新章节咯",
    "text": "新的章节是 http://www.baidu.com",
    "html": "<h1>Welcome</h1><p>That was easy!</p ><a href=' '>新的章节</a >"
}
headers = {'content-type': "application/json"}
db = pymysql.connect("39.104.226.149",
                     "root",
                     "root",
                     "spider",
                     charset='utf8')
keyVa = {}
log = Log()


def main():
    # 查询所有小说
    da = seloss()
    # 查询key_value 表中的最新章
    selKeyValue(da)
    # 查询小说是否更新
    selNovel()
    threading.Timer(60, main).start()


# 查询所有小说
def seloss():
    ossCur = db.cursor()
Ejemplo n.º 8
0
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

# 根据传入参数设置从哪里开始下载
starturl = "https://www.dingdiann.com"
searchurl = "https://www.dingdiann.com/searchbook.php?keyword="
db = pymysql.connect("39.104.226.149",
                     "root",
                     "root",
                     "spider",
                     charset='utf8')
id_not_in = {}
logger = Log()


# 获取章节内容
def spiderContent(url, id, name):
    try:
        response = urllib2.urlopen(url, timeout=60)
        the_page = response.read()
        soup = BeautifulSoup(the_page, "html.parser")
        bookName = soup.select("div[class='bookname'] > h1")[0].text
        bookContent = soup.select("div[id='content']")[0]
        nextPage = soup.select("div[class='bottem1'] > a")[3]["href"]
        li_plants = bookContent.script
        if li_plants:
            li_plants.clear()
        data = str(bookContent).replace("\\", "").replace(
Ejemplo n.º 9
0
    def convert(self, sheet):
        """
        真正转化部分
        :param sheet: excel 的 sheet 对象
        :return: ErrorConstant.Error
        """
        Log.info("--- convert ---")
        keyIndex = -1
        moduleIndex = -1
        tempLanguageIndex = None
        # 返回由该行中所有单元格的数据组成的列表
        try:
            firstRow = sheet.row_values(0)
        except Exception as e:
            return Constant.Error(Constant.EXCEPTION_EXL_FILE, e.message)

        if len(firstRow) == 0:
            return Constant.Error(Constant.ERROR_KEY_NOT_FOUND)

        for index in range(len(firstRow)):
            if firstRow[index] == self.keyTitle:
                keyIndex = index
                pass
            elif firstRow[index] == self.moduleTitle:
                moduleIndex = index
                pass
            elif firstRow[index] == self.targetLanguage:
                tempLanguageIndex = index
                pass

        if keyIndex == -1:
            return Constant.Error(Constant.ERROR_KEY_NOT_FOUND)

        # 获取 key 集合,并删除 title 项
        xlsKeys = sheet.col_values(keyIndex)
        del xlsKeys[0]

        if self.filePath and tempLanguageIndex:  # 输入是文件,指定目标语言
            Log.debug("keyIndex = %s moduleIndex = %s languageIndex = %s" % (keyIndex, moduleIndex, tempLanguageIndex))
            # 获取 value 集合,并删除 title 项
            xlsValues = sheet.col_values(tempLanguageIndex)
            del xlsValues[0]

            XMLParse.update_xml_value(self.filePath, xlsKeys, xlsValues)
            return Constant.Error(Constant.SUCCESS)

        Log.debug("Not file")

        if moduleIndex == -1:
            return Constant.Error(Constant.ERROR_MODULE_NOT_FOUND)

        if not self.dirPath:  # 目录为空,返回
            Log.error("Error:输入不合法")
            return Constant.Error(Constant.ERROR_IMPORT_INPUT)

        if not os.path.exists(self.dirPath):
            Log.error("Error:目标目录不存在 %s" % self.dirPath)
            return Constant.Error(Constant.ERROR_DIR_NOT_EXIST)

        for index, title in enumerate(firstRow):
            if index < self.fromIndex:
                continue
            languageIndex = index
            targetLanguage = title
            # print languageIndex
            # print title
            xlsKeys = sheet.col_values(keyIndex)
            del xlsKeys[0]

            xlsModules = sheet.col_values(moduleIndex)
            del xlsModules[0]

            xlsValues = sheet.col_values(languageIndex)
            del xlsValues[0]
            # 文件路径(子目录) 比如; value-zh
            # ├── android
            # │   ├── values-zh
            # │   |	├── strings_device.xml
            # │   |	├── strings_me.xml
            # │   |	├── strings_moment.xml
            # │   ├── values-de
            # │   ├── values-ko
            sub_dir_path = covertTargetPath(self.dirPath, targetLanguage)
            print sub_dir_path
            if os.path.exists(sub_dir_path):
                XMLParse.update_multi_xml_value(sub_dir_path, xlsKeys, xlsValues, xlsModules)

        return Constant.Error(Constant.SUCCESS)