def code_pro_dis(): # 源文件 src_book = xlrd.open_workbook(r'E:\标签化测试.xlsx') ws_src = src_book.sheet_by_name('Sheet') # 书写目的文件 # 创建一个workbook 设置编码 des_book = xlwt.Workbook(encoding='utf-8') # 创建一个worksheet ws_des = des_book.add_sheet('Worksheet') # 日志文件 log = MyLog.MyLog("test") logger = log.init_logger() for i in range(0, ws_src.nrows): try: # 获取原excel中的一行数据 row_values = ws_src.row_values(i) # 获取地址 src_province_name = row_values[7].replace(' ', '') src_distinct_name = row_values[8].replace(' ', '') now_province_name = row_values[9].replace(' ', '') now_distinct_name = row_values[10].replace(' ', '') ws_des.write(i, 0, get_code(province, src_province_name)) ws_des.write(i, 1, get_code(city, src_distinct_name)) ws_des.write(i, 2, get_code(province, now_province_name)) ws_des.write(i, 3, get_code(city, now_distinct_name)) des_book.save('demo.xls') except IOError: logger.error(i + "行出错") logger.info(i)
def __init__(self): """构造函数,初始化属性""" self.urls = UrlManager() self.log = MyLog("spider_main", "logs") self.downloader = HtmlDownloader() self.parser = HtmlParser() self.outputer = HtmlOutputer()
def __init__(self, parent=None): super(WizardDelJournal, self).__init__(parent) self.setModal(True) self.setAttribute(QtCore.Qt.WA_DeleteOnClose) self.parent = parent self.resource_dir, self.DATA_PATH = functions.getRightDirs() if parent is None: self.l = MyLog("activity.log") # Dummy file for saving if testing self.options = QtCore.QSettings("debug/options.ini", QtCore.QSettings.IniFormat) self.test = True else: self.l = self.parent.l self.options = self.parent.options self.test = False # Store the checkboxes of the window self.check_journals = [] self.initUI() self.defineSlots()
def __init__(self, parent=None): super(AdvancedSearch, self).__init__(parent) self.setAttribute(QtCore.Qt.WA_DeleteOnClose) self.parent = parent self.resource_dir, DATA_PATH = functions.getRightDirs() # Condition to use a specific logger if # module started in standalone if parent is None: self.logger = MyLog("activity.log") self.test = True else: self.logger = self.parent.l self.test = False self.options = QtCore.QSettings(DATA_PATH + "/config/searches.ini", QtCore.QSettings.IniFormat) # List to store the lineEdit, with the value of # the search fields self.fields_list = [] self.initUI() self.defineSlots() self.restoreSettings()
def __init__(self): """构造函数,初始化属性""" self.log = MyLog("html_outputer", "logs") filename = "output\\ershoufang.csv" with open(filename, "w", newline="") as f: data = [ "id", "小区名称", "所在区域", "总价", "单价", "房屋户型", "所在楼层", "建筑面积", "户型结构", "套内面积", "建筑类型", "房屋朝向", "建筑结构", "装修情况", "梯户比例", "配备电梯", "产权年限", "挂牌时间", "交易权属", "上次交易", "房屋用途", "房屋年限", "产权所属", "抵押信息", "房本备件", ] writer = csv.writer(f, dialect='excel') writer.writerow(data)
def __init__(self, parent=None): super(Signing, self).__init__(parent) self.setAttribute(QtCore.Qt.WA_DeleteOnClose) self.parent = parent self.resource_dir, self.DATA_PATH = functions.getRightDirs() if parent is None: self.logger = MyLog("activity.log") self.test = True else: self.logger = self.parent.l self.test = False # Attribute to check if the login was valid self.validated = False self.setModal(True) self.initUI() self.defineSlots() self.getCaptcha()
def scanner_file(): log = MyLog.MyLog("test") logger = log.init_logger() for i in range(2, ws_src.max_row + 1): # 寻亲编号 case_id = ws_src.cell(row=i, column=2).value if case_id is None: logger.error(i + '====case_id:' + case_id) continue # 现在的地址信息 birthInfo = ws_src.cell(row=i, column=5).value birthDay = split_double_time(birthInfo) if birthDay is None: write_to_file(birthInfo) logger.error(i + '====birthInfo:' + birthInfo) continue # 失踪地的地址信息 lostInfo = ws_src.cell(row=i, column=7).value lostDay = split_double_time(lostInfo) if lostDay is None: write_to_file(lostInfo) logger.error(i + '====lostInfo:' + lostInfo) continue # 现在的地址信息 now_addr_info = ws_src.cell(row=i, column=8).value nowaddr = split_double_address(now_addr_info) if nowaddr is None: write_to_file(now_addr_info) logger.error(i + '====now_addr_info:' + now_addr_info) continue # 失踪地的地址信息 pre_addr_info = ws_src.cell(row=i, column=9).value preAddr = split_double_address(pre_addr_info) if preAddr is None: write_to_file(pre_addr_info) logger.error(i+'====pre_addr_info:'+pre_addr_info) continue # 数据拼接 for time in lostDay: birthDay.append(time) # print(nowAddr) for addr in preAddr: birthDay.append(addr) for addr in nowaddr: birthDay.append(addr) # 写入excel ws_des.append(birthDay) # print(birthDay) logger.info(birthDay) des_book.save('demo.xlsx')
def __init__(self): """构造函数,初始化属性""" self.log = MyLog("html_downloader", "logs") self.user_agent = [ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; InfoPath.2; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; 360SE) ", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0) ", "Mozilla/5.0 (Windows NT 5.1; zh-CN; rv:1.9.1.3) Gecko/20100101 Firefox/8.0", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)", "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; TencentTraveler 4.0; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36" ]
def __init__(self): self.log = MyLog("html_output", "logs") # 数据集写入的位置和格式 filename = "dataout/secondhome.csv" with open(filename, "w", newline="")as file: # 链家二手房信息表格中的数据内容 data = ["id", "小区名称", "所在区域","成交时间", "总价", "单价", "房屋户型", "所在楼层", "建筑面积", "户型结构", "套内面积", "建筑类型", "房屋朝向", "建成年代", "装修情况", "建筑结构", "供暖方式", "梯户比例", "配备电梯", "链家编号", "交易权属", "挂牌时间", "房屋用途", "房屋年限", "房权所属"] # 位置、格式 writer = csv.writer(file, dialect='excel') writer.writerow(data)
def __init__(self, title, link, graphical=None, parent=None): super(MyTwit, self).__init__(parent) self.setAttribute(QtCore.Qt.WA_DeleteOnClose) self.parent = parent # Remove html tags from the title self.title = removeHtml(title) self.link = link self.graphical = graphical self.resource_dir, self.DATA_PATH = functions.getRightDirs() if parent is None: self.l = MyLog("activity.log") else: self.l = self.parent.l self.CONSUMER_KEY = 'IaTVXKtZ7uBjzcVWzsVmMYKtP' self.CONSUMER_SECRET = '8hsz0Zj3CupFfvJMAhpG3UjMLs7HZjGywRsjRJI8IcjIA4NrEk' self.MY_TWITTER_CREDS = self.DATA_PATH + '/config/twitter_credentials' self.initUI() self.defineSlots() # If no credentials, try to get them if not os.path.exists(self.MY_TWITTER_CREDS): authentified = self.openAuthPage() # No credentials obtained, exit if not authentified: return self.setTweetText() self.show()
query.prepare("UPDATE papers SET percentage_match = ? WHERE id = ?") for id_bdd, percentage in zip(list_id, list_percentages): # Convert the percentage to a float, because the number is # probably a type used by numpy. MANDATORY params = (float(percentage), id_bdd) for value in params: query.addBindValue(value) query.exec_() if not self.bdd.commit(): self.l.critical("Percentages match not correctly written in db") else: self.l.debug("Percentages written to db in {}". format(datetime.datetime.now() - diff_time)) self.l.debug("Done calculating match percentages in {}". format(datetime.datetime.now() - start_time)) self.calculated_something = True if __name__ == "__main__": logger = MyLog("test.log") predictor = Predictor(logger, []) predictor.initializePipeline() predictor.calculatePercentageMatch()
import pymssql import threading import time from config import DB_token from log import MyLog log = MyLog() def judge_phase(content): guess = [] key_state = { '打包': '途中', '发出': '途中', '收入': '途中', '发往': '途中', '到达': '途中', '到件扫描': '途中', '称重扫描': '途中', '进行分拨': '途中', '【反馈】扫描': '途中', '离开': '途中', '卸车扫描': '途中', '【称重】扫描': '途中', '【到件】扫描': '途中', '【卸车】扫描': '途中', '【分发】扫描': '途中', '快件扫描': '途中', '已拆包': '途中', '已收寄': '途中',
def __init__(self): self.urls = UrlManager() self.parser = HtmlParser() self.downloader = UrlDownloader() self.log = MyLog("spider", "logs") self.output = HtmlOutPut()
import validators import datetime from pprint import pprint import hosts from log import MyLog LENGTH_SAMPLE = 3 HEADERS = { 'User-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0', 'Connection': 'close' } l = MyLog("output_tests_hosts.log", mode='w') l.debug("---------------------- START NEW RUN OF TESTS ----------------------") def logAssert(test, msg): """Function to log the result of an assert http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file """ if not test: l.error(msg) assert test, msg def test_reject(): """Test each entry in a sample of rejectable articles"""
def __init__(self): # 写入日志,标明是解析html self.log = MyLog("html_parser", "logs")
Start the tests with something like this: py.test -xs test_hosts.py -k getData """ import os import requests import pytest import validators from bs4 import BeautifulSoup, SoupStrainer from log import MyLog # Nbr of ACS journals registered NBR_ACS_JOURNALS = 55 l = MyLog("output_tests_feeds.log", mode='w') l.debug("---------------------- START NEW RUN OF TESTS ----------------------") def logAssert(test, msg): """Function to log the result of an assert http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file """ if not test: l.error(msg) assert test, msg def test_ACSFeeds(): """Function to test we have the right number of ACS journals"""
def __init__(self): """构造函数,初始化属性""" self.log = MyLog("html_parser", "logs")
""" Test module to be ran with pytest. Start the tests with something like this: py.test -xs test_hosts.py -k getData """ import os import pytest from log import MyLog from advanced_search import AdvancedSearch from wizard_add_journal import WizardAddJournal from wizard_del_journal import WizardDelJournal l = MyLog("output_tests_gui_components.log", mode='w') l.debug("---------------------- START NEW RUN OF TESTS ----------------------") def logAssert(test, msg): """Function to log the result of an assert http://stackoverflow.com/questions/24892396/py-test-logging-messages-and-test-results-assertions-into-a-single-file """ if not test: l.error(msg) assert test, msg # def test_WizardJournal(qtbot):