def __init__(self): clear() self.tip() self.log = MyLog() self.title = 'unknow' self.packageSize = 1024 * 1024 self.mvPlayUrl = self.getMvPlayUrl()
def getFtpFileMain(self): while True: ml = MyLog() scanSN = ScanSernum() SN = scanSN.scanMain() snlist = [] if "/" in SN: #Input file with open(SN,'r') as snfile: for asn in snfile.readlines(): if "FDO" in asn: snlist.append(asn.strip()) else: snlist.append(SN) myarea = ScanAreaType() logarea = myarea.askArea() logtype = myarea.asklogType().lower() if "&" in logtype: logtype = "FDO" #Copy mview and mtype else: logtype = logtype[1:] #the logtype finally value is type or view logList = [] for sn in snlist: pcsn = GetPCSN() pcsntuple = pcsn.getMain(sn) childsn = pcsntuple[0] parentsn = pcsntuple[1] self.chDir() getlogpath = GetLogPath() childFolderList = getlogpath.getMain(parentsn,childsn,logarea) input_str = '>>>>Start find SN: %s/%s test log in ftp server'%(childsn, parentsn) ml.info(input_str) print UseStyle(input_str, fore='blue') for childFolder in childFolderList: if "KFCR" in childFolder: KFCRYear = int(childFolder.split('/')[1]) KFCRWeek = int(childFolder.split('/')[3][2:]) #Rename the KFCR log name during copying it to ftp server on 2018/3/2 if KFCRYear <= 2018 and KFCRWeek < 10 or KFCRYear == 2017: logtype = "FDO" #Both mtype and mview will in zip file, eg:FDO2114B0KD_1491454226.zip myftp = LoginFTP() ftp = myftp.ftpserver() ftp.cwd('/') #Enter the FTP top folder ftp.cwd(self.baseFtpFolder) try: ftp.cwd(childFolder) for fileList in ftp.nlst(): if (childsn in fileList or parentsn in fileList) and logtype in fileList: input_str = '--->Copy file:%s to %s'%(fileList,os.getcwd()) print UseStyle(input_str, fore='blue') ml.info(input_str) if not os.path.lexists(fileList): #Log already exist in /usr/auto/testlog mydlfile = DLFtpFile() mydlfile.downloadFile(ftp, fileList) logList.append(fileList) except Exception, err: #The script still running although met error outStr = r"!!!No file in C:/Backup/BU3/%s in FTP server, ERR:%s"%(childFolder,err) print UseStyle(outStr, fore='black', back='yellow') if len(childFolderList) == 1: break finally: pass
def process_item(self, item, spider): m1 = MyLog() cityName = item['cityName'].encode('utf8') img = os.path.basename(item['img']) week = item['week'].encode('utf8') weather = item['weather'].encode('utf8') shidu = item['shidu'].encode('utf8') air = item['air'].encode('utf8') m1.info('进行mysql存储') conn = MySQLdb.connect( host='localhost', port=3306, user='******', password='******', db='scrapyDB', charset='utf8' ) cur = conn.cursor() cur.execute("insert into weather(cityName,img,week,weather,shidu,air) values(%s,%s,%s,%s,%s,%s)", (cityName,img,week,weather,shidu,air)) cur.close() conn.commit() conn.close() m1.info('mysql存储完成') return item
def __init__(self, username, password): self.log = MyLog() #获得打印日志对象 self.username = username self.password = password self.driver = webdriver.Chrome() self.driver.implicitly_wait(5) #静静等待10s self.isLogin = 0 self.uid = ""
def process_item(self, item, spider): m1 = MyLog() today = time.strftime('%Y%m%d', time.localtime()) fileName = 'weather' + today + '.json' m1.error('转换json开始') with codecs.open(fileName, 'a', encoding='utf8') as fp: line = json.dumps(dict(item), ensure_ascii=False) + '\n' fp.write(line) m1.warn('转换json结束') return item
def process_item(self, item, spider): m1 = MyLog() today = time.strftime('%Y%m%d', time.localtime()) fileName = 'weather' + today + '.txt' m1.info('同步开始') with open(fileName, 'a') as fp: fp.write(item['cityName'].encode('utf-8') + '\t') fp.write(item['weather'].encode('utf-8') + '\t') imgName = os.path.basename(item['img']) fp.write(imgName + '\t') if os.path.exists(imgName): pass else: with open(imgName, 'wb') as fp: response = urllib2.urlopen(item['img']) fp.write(response.read()) fp.write(item['shidu'].encode('utf-8') + '\t') fp.write(item['air'].encode('utf-8') + '\n\n') time.sleep(1) m1.info('同步结束') return item
def __init__(self): self.log=MyLog() self.testTime() self.testLocaltime() self.testSleep() self.testStrftime()
#!/usr/bin/env python # -*- coding: utf-8 -*- from myLog import MyLog if __name__ == '__main__': ml = MyLog() ml.debug("I am the debug message") ml.info("I am the info message") ml.warn("I am the warn message") ml.error("I am the error message") ml.critical("I am the critical message")
# coding:utf-8 from myLog import MyLog mylog = MyLog() def testMylog(): try: a = 1 / 0 except ZeroDivisionError as e: mylog.error("def testMylog " + str(e)) if __name__ == '__main__': testMylog()
def testLog(): mylog = MyLog() mylog.debug('it is debug') mylog.error("I'm error")
def delete_proxy(self, proxy): requests.get("ht(" t("http://127.0.0.1:5010/delete/?proxy={}". {} ".format(proxy)) def get_html(self, url): retry_count = 5 proxy = self.get_pro_proxy() print(proxy) while retry_count > 0: try: html = requests.get(url(url, proxies={"http": "http://{}".format(proxy)}) # 使用代理访问 return rn html.text except Exception: retry_count -= 1 # 出错5次, 删除代理池中代理 self.delete_pro_proxy(proxy) self.get_htm_html(url) logger = MyLog() def get_html(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: logger.error('get_html出错页面为: ' + url) return " ERROR when get html" def get_content(url): print("当前爬取的网页为"+url) soup = BeautifulSoup(get_html(url),'lxml') try: forum_name = soup.find('p',attrs={'class':"nrbt"}).a.text print("当前爬取的论坛名为:"+forum_name) except: logger.error("get_forum_name出错"+"页面为"+url) forum_name = None try: time = soup.find('p',attrs={'class':"fbsj"}).text[4:] time = datetime.strptime(time,'%Y-%m-%d %H:%M') print("这个帖子的发表时间为:"+time) except: logger.error("get_publish_time出错" + "页面为" + url) time = None try: topic = soup.find('div',attrs={'class':"nr_r_c"}).find('p',attrs={'class':"contitle"}).text print("帖子的主题为:"+topic) topic = None except: logger.error("get_topic出错" + "页面为" + url) topic = None # all_neirong = [] # for part in soup.find_all('div',attrs={"class":"neirong"}): # # print(part.text) # neirong_div = part.children # neirong = '' # try: # neirong+=neirong.text # except: # None # for i in neirong_div: # try: # img = i.find('img') # if img: # neirong = neirong+'['+img.attrs['src'] + ']' # except: # None # try: # text = i.text # # print('text'+text) # neirong = neirong+text # except: # None # # print("模块的内容为:"+neirong) # if neirong != None: # all_neirong.append(neirong) print(all_neirong) # get_content("http://bbs.12365auto.com/postcontent.aspx?tID=47547&sId=1527&ppage=1&from=s") get_content("http://bbs.12365auto.com/postcontent.aspx?tID=133692&sId=1147&ppage=1&from=s")