def code_pro_dis():

    # 源文件
    src_book = xlrd.open_workbook(r'E:\标签化测试.xlsx')
    ws_src = src_book.sheet_by_name('Sheet')
    # 书写目的文件
    # 创建一个workbook 设置编码
    des_book = xlwt.Workbook(encoding='utf-8')
    # 创建一个worksheet
    ws_des = des_book.add_sheet('Worksheet')
    # 日志文件
    log = MyLog.MyLog("test")
    logger = log.init_logger()
    for i in range(0, ws_src.nrows):
        try:
            # 获取原excel中的一行数据
            row_values = ws_src.row_values(i)
            # 获取地址
            src_province_name = row_values[7].replace(' ', '')
            src_distinct_name = row_values[8].replace(' ', '')
            now_province_name = row_values[9].replace(' ', '')
            now_distinct_name = row_values[10].replace(' ', '')

            ws_des.write(i, 0, get_code(province, src_province_name))
            ws_des.write(i, 1, get_code(city, src_distinct_name))
            ws_des.write(i, 2, get_code(province, now_province_name))
            ws_des.write(i, 3, get_code(city, now_distinct_name))

            des_book.save('demo.xls')
        except IOError:
            logger.error(i + "行出错")
        logger.info(i)
 def __init__(self):
     """构造函数,初始化属性"""
     self.urls = UrlManager()
     self.log = MyLog("spider_main", "logs")
     self.downloader = HtmlDownloader()
     self.parser = HtmlParser()
     self.outputer = HtmlOutputer()
Beispiel #3
0
    def __init__(self, parent=None):

        super(WizardDelJournal, self).__init__(parent)

        self.setModal(True)

        self.setAttribute(QtCore.Qt.WA_DeleteOnClose)

        self.parent = parent

        self.resource_dir, self.DATA_PATH = functions.getRightDirs()

        if parent is None:
            self.l = MyLog("activity.log")

            # Dummy file for saving if testing
            self.options = QtCore.QSettings("debug/options.ini",
                                            QtCore.QSettings.IniFormat)

            self.test = True
        else:
            self.l = self.parent.l
            self.options = self.parent.options
            self.test = False

        # Store the checkboxes of the window
        self.check_journals = []

        self.initUI()
        self.defineSlots()
Beispiel #4
0
    def __init__(self, parent=None):

        super(AdvancedSearch, self).__init__(parent)

        self.setAttribute(QtCore.Qt.WA_DeleteOnClose)

        self.parent = parent

        self.resource_dir, DATA_PATH = functions.getRightDirs()

        # Condition to use a specific logger if
        # module started in standalone
        if parent is None:
            self.logger = MyLog("activity.log")
            self.test = True
        else:
            self.logger = self.parent.l
            self.test = False

        self.options = QtCore.QSettings(DATA_PATH + "/config/searches.ini",
                                        QtCore.QSettings.IniFormat)

        # List to store the lineEdit, with the value of
        # the search fields
        self.fields_list = []

        self.initUI()
        self.defineSlots()
        self.restoreSettings()
Beispiel #5
0
 def __init__(self):
     """构造函数,初始化属性"""
     self.log = MyLog("html_outputer", "logs")
     filename = "output\\ershoufang.csv"
     with open(filename, "w", newline="") as f:
         data = [
             "id",
             "小区名称",
             "所在区域",
             "总价",
             "单价",
             "房屋户型",
             "所在楼层",
             "建筑面积",
             "户型结构",
             "套内面积",
             "建筑类型",
             "房屋朝向",
             "建筑结构",
             "装修情况",
             "梯户比例",
             "配备电梯",
             "产权年限",
             "挂牌时间",
             "交易权属",
             "上次交易",
             "房屋用途",
             "房屋年限",
             "产权所属",
             "抵押信息",
             "房本备件",
         ]
         writer = csv.writer(f, dialect='excel')
         writer.writerow(data)
Beispiel #6
0
    def __init__(self, parent=None):

        super(Signing, self).__init__(parent)

        self.setAttribute(QtCore.Qt.WA_DeleteOnClose)

        self.parent = parent

        self.resource_dir, self.DATA_PATH = functions.getRightDirs()

        if parent is None:
            self.logger = MyLog("activity.log")
            self.test = True
        else:
            self.logger = self.parent.l
            self.test = False

        # Attribute to check if the login was valid
        self.validated = False

        self.setModal(True)

        self.initUI()
        self.defineSlots()
        self.getCaptcha()
def scanner_file():
    log = MyLog.MyLog("test")
    logger = log.init_logger()

    for i in range(2, ws_src.max_row + 1):
        # 寻亲编号
        case_id = ws_src.cell(row=i, column=2).value
        if case_id is None:
            logger.error(i + '====case_id:' + case_id)
            continue
        # 现在的地址信息
        birthInfo = ws_src.cell(row=i, column=5).value
        birthDay = split_double_time(birthInfo)
        if birthDay is None:
            write_to_file(birthInfo)
            logger.error(i + '====birthInfo:' + birthInfo)
            continue
        # 失踪地的地址信息
        lostInfo = ws_src.cell(row=i, column=7).value
        lostDay = split_double_time(lostInfo)
        if lostDay is None:
            write_to_file(lostInfo)
            logger.error(i + '====lostInfo:' + lostInfo)
            continue
        # 现在的地址信息
        now_addr_info = ws_src.cell(row=i, column=8).value
        nowaddr = split_double_address(now_addr_info)
        if nowaddr is None:
            write_to_file(now_addr_info)
            logger.error(i + '====now_addr_info:' + now_addr_info)
            continue
        # 失踪地的地址信息
        pre_addr_info = ws_src.cell(row=i, column=9).value
        preAddr = split_double_address(pre_addr_info)
        if preAddr is None:
            write_to_file(pre_addr_info)
            logger.error(i+'====pre_addr_info:'+pre_addr_info)
            continue
        # 数据拼接
        for time in lostDay:
            birthDay.append(time)
        # print(nowAddr)
        for addr in preAddr:
            birthDay.append(addr)
        for addr in nowaddr:
            birthDay.append(addr)
        # 写入excel
        ws_des.append(birthDay)
        # print(birthDay)
        logger.info(birthDay)
        des_book.save('demo.xlsx')
    def __init__(self):
        """构造函数,初始化属性"""
        self.log = MyLog("html_downloader", "logs")

        self.user_agent = [
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; InfoPath.2; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; 360SE) ",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0) ",
            "Mozilla/5.0 (Windows NT 5.1; zh-CN; rv:1.9.1.3) Gecko/20100101 Firefox/8.0",
            "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
            "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; TencentTraveler 4.0; .NET CLR 2.0.50727)",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
        ]
Beispiel #9
0
 def __init__(self):
     self.log = MyLog("html_output", "logs")
     # 数据集写入的位置和格式
     filename = "dataout/secondhome.csv"
     with open(filename, "w", newline="")as file:
         # 链家二手房信息表格中的数据内容
         data = ["id", "小区名称", "所在区域","成交时间", "总价", "单价",
                 "房屋户型", "所在楼层", "建筑面积", "户型结构",
                 "套内面积", "建筑类型", "房屋朝向", "建成年代",
                 "装修情况", "建筑结构", "供暖方式", "梯户比例",
                 "配备电梯", "链家编号", "交易权属", "挂牌时间",
                 "房屋用途", "房屋年限", "房权所属"]
         # 位置、格式
         writer = csv.writer(file, dialect='excel')
         writer.writerow(data)
Beispiel #10
0
    def __init__(self, title, link, graphical=None, parent=None):

        super(MyTwit, self).__init__(parent)

        self.setAttribute(QtCore.Qt.WA_DeleteOnClose)

        self.parent = parent

        # Remove html tags from the title
        self.title = removeHtml(title)

        self.link = link
        self.graphical = graphical

        self.resource_dir, self.DATA_PATH = functions.getRightDirs()

        if parent is None:
            self.l = MyLog("activity.log")
        else:
            self.l = self.parent.l

        self.CONSUMER_KEY = 'IaTVXKtZ7uBjzcVWzsVmMYKtP'
        self.CONSUMER_SECRET = '8hsz0Zj3CupFfvJMAhpG3UjMLs7HZjGywRsjRJI8IcjIA4NrEk'

        self.MY_TWITTER_CREDS = self.DATA_PATH + '/config/twitter_credentials'

        self.initUI()
        self.defineSlots()

        # If no credentials, try to get them
        if not os.path.exists(self.MY_TWITTER_CREDS):
            authentified = self.openAuthPage()

            # No credentials obtained, exit
            if not authentified:
                return

        self.setTweetText()
        self.show()
Beispiel #11
0
        query.prepare("UPDATE papers SET percentage_match = ? WHERE id = ?")

        for id_bdd, percentage in zip(list_id, list_percentages):

            # Convert the percentage to a float, because the number is
            # probably a type used by numpy. MANDATORY
            params = (float(percentage), id_bdd)

            for value in params:
                query.addBindValue(value)

            query.exec_()

        if not self.bdd.commit():
            self.l.critical("Percentages match not correctly written in db")
        else:
            self.l.debug("Percentages written to db in {}".
                         format(datetime.datetime.now() - diff_time))

            self.l.debug("Done calculating match percentages in {}".
                         format(datetime.datetime.now() - start_time))

        self.calculated_something = True


if __name__ == "__main__":
    logger = MyLog("test.log")
    predictor = Predictor(logger, [])
    predictor.initializePipeline()
    predictor.calculatePercentageMatch()
Beispiel #12
0
import pymssql
import threading
import time
from config import DB_token
from log import MyLog

log = MyLog()


def judge_phase(content):
    guess = []

    key_state = {
        '打包': '途中',
        '发出': '途中',
        '收入': '途中',
        '发往': '途中',
        '到达': '途中',
        '到件扫描': '途中',
        '称重扫描': '途中',
        '进行分拨': '途中',
        '【反馈】扫描': '途中',
        '离开': '途中',
        '卸车扫描': '途中',
        '【称重】扫描': '途中',
        '【到件】扫描': '途中',
        '【卸车】扫描': '途中',
        '【分发】扫描': '途中',
        '快件扫描': '途中',
        '已拆包': '途中',
        '已收寄': '途中',
Beispiel #13
0
 def __init__(self):
     self.urls = UrlManager()
     self.parser = HtmlParser()
     self.downloader = UrlDownloader()
     self.log = MyLog("spider", "logs")
     self.output = HtmlOutPut()
Beispiel #14
0
import validators
import datetime
from pprint import pprint

import hosts
from log import MyLog

LENGTH_SAMPLE = 3

HEADERS = {
    'User-agent':
    'Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0',
    'Connection': 'close'
}

l = MyLog("output_tests_hosts.log", mode='w')
l.debug("---------------------- START NEW RUN OF TESTS ----------------------")


def logAssert(test, msg):
    """Function to log the result of an assert
    http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file
    """

    if not test:
        l.error(msg)
        assert test, msg


def test_reject():
    """Test each entry in a sample of rejectable articles"""
 def __init__(self):
     # 写入日志,标明是解析html
     self.log = MyLog("html_parser", "logs")
Beispiel #16
0
Start the tests with something like this:
py.test -xs test_hosts.py -k getData
"""

import os
import requests
import pytest
import validators
from bs4 import BeautifulSoup, SoupStrainer

from log import MyLog

# Nbr of ACS journals registered
NBR_ACS_JOURNALS = 55

l = MyLog("output_tests_feeds.log", mode='w')
l.debug("---------------------- START NEW RUN OF TESTS ----------------------")


def logAssert(test, msg):
    """Function to log the result of an assert
    http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file
    """

    if not test:
        l.error(msg)
        assert test, msg


def test_ACSFeeds():
    """Function to test we have the right number of ACS journals"""
Beispiel #17
0
 def __init__(self):
     """构造函数,初始化属性"""
     self.log = MyLog("html_parser", "logs")
"""
Test module to be ran with pytest.

Start the tests with something like this:
py.test -xs test_hosts.py -k getData
"""

import os
import pytest

from log import MyLog
from advanced_search import AdvancedSearch
from wizard_add_journal import WizardAddJournal
from wizard_del_journal import WizardDelJournal

l = MyLog("output_tests_gui_components.log", mode='w')
l.debug("---------------------- START NEW RUN OF TESTS ----------------------")


def logAssert(test, msg):
    """Function to log the result of an assert
    http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file
    """

    if not test:
        l.error(msg)
        assert test, msg


# def test_WizardJournal(qtbot):