Exemple #1
0
 def __init__(self):
     clear()
     self.tip()
     self.log = MyLog()
     self.title = 'unknow'
     self.packageSize = 1024 * 1024
     self.mvPlayUrl = self.getMvPlayUrl()
Exemple #2
0
    def getFtpFileMain(self):
        while True:
            ml = MyLog()
            scanSN = ScanSernum()
            SN = scanSN.scanMain()
            snlist = []
            if "/" in SN:	#Input file
                with open(SN,'r') as snfile:
                   for asn in snfile.readlines():
                       if "FDO" in asn:
                           snlist.append(asn.strip())
            else:
                snlist.append(SN)
            myarea = ScanAreaType()
            logarea = myarea.askArea()
            logtype = myarea.asklogType().lower()
            if "&" in logtype:
                logtype = "FDO"   #Copy mview and mtype
            else:
                logtype = logtype[1:]   #the logtype finally value is type or view
	    logList = []
            for sn in snlist:
	            pcsn = GetPCSN()
	            pcsntuple = pcsn.getMain(sn)
	            childsn = pcsntuple[0]
	            parentsn = pcsntuple[1]
	            self.chDir()
	            getlogpath = GetLogPath()
	            childFolderList = getlogpath.getMain(parentsn,childsn,logarea)
	            input_str = '>>>>Start find SN: %s/%s test log in ftp server'%(childsn, parentsn)
	            ml.info(input_str)
	            print UseStyle(input_str, fore='blue')
	            for childFolder in childFolderList:
	                if "KFCR" in childFolder:
	                    KFCRYear = int(childFolder.split('/')[1])
	                    KFCRWeek = int(childFolder.split('/')[3][2:])
	                    #Rename the KFCR log name during copying it to ftp server on 2018/3/2
	                    if KFCRYear <= 2018 and KFCRWeek < 10 or KFCRYear == 2017:
	                        logtype = "FDO"    #Both mtype and mview will in zip file, eg:FDO2114B0KD_1491454226.zip
	                myftp = LoginFTP()
	                ftp = myftp.ftpserver()
                        ftp.cwd('/')	#Enter the FTP top folder
	                ftp.cwd(self.baseFtpFolder)
	                try:
	                    ftp.cwd(childFolder)
	                    for fileList in ftp.nlst():
	                        if (childsn in fileList or parentsn in fileList) and logtype in fileList:
	                            input_str = '--->Copy file:%s to %s'%(fileList,os.getcwd())
	                            print UseStyle(input_str, fore='blue')
	                            ml.info(input_str)
				    if not os.path.lexists(fileList):    #Log already exist in /usr/auto/testlog
	                                mydlfile = DLFtpFile()
	                                mydlfile.downloadFile(ftp, fileList)
	                            logList.append(fileList)
	                except Exception, err:	#The script still running although met error
	                    outStr = r"!!!No file in C:/Backup/BU3/%s in FTP server, ERR:%s"%(childFolder,err)
	                    print UseStyle(outStr, fore='black', back='yellow')
	                    if len(childFolderList) == 1: break
	                finally:
                            pass
Exemple #3
0
    def process_item(self, item, spider):
        m1 = MyLog()
        cityName = item['cityName'].encode('utf8')
        img = os.path.basename(item['img'])
        week = item['week'].encode('utf8')
        weather = item['weather'].encode('utf8')
        shidu = item['shidu'].encode('utf8')
        air = item['air'].encode('utf8')

        m1.info('进行mysql存储')
        
        conn = MySQLdb.connect(
            host='localhost',
            port=3306,
            user='******',
            password='******',
            db='scrapyDB',
            charset='utf8'
        )
        cur = conn.cursor()
        cur.execute("insert into weather(cityName,img,week,weather,shidu,air) values(%s,%s,%s,%s,%s,%s)", (cityName,img,week,weather,shidu,air))
        cur.close()
        conn.commit()
        conn.close()

        m1.info('mysql存储完成')
        return item
Exemple #4
0
 def __init__(self, username, password):
     self.log = MyLog()  #获得打印日志对象
     self.username = username
     self.password = password
     self.driver = webdriver.Chrome()
     self.driver.implicitly_wait(5)  #静静等待10s
     self.isLogin = 0
     self.uid = ""
Exemple #5
0
 def process_item(self, item, spider):
     m1 = MyLog()
     today = time.strftime('%Y%m%d', time.localtime())
     fileName = 'weather' + today + '.json'
     m1.error('转换json开始')
     with codecs.open(fileName, 'a', encoding='utf8') as fp:
         line = json.dumps(dict(item), ensure_ascii=False) + '\n'
         fp.write(line)
     m1.warn('转换json结束')
     return item
Exemple #6
0
 def process_item(self, item, spider):
     m1 = MyLog()
     today = time.strftime('%Y%m%d', time.localtime())
     fileName = 'weather' + today + '.txt'
     m1.info('同步开始')
     with open(fileName, 'a') as fp:
         fp.write(item['cityName'].encode('utf-8') + '\t')
         fp.write(item['weather'].encode('utf-8') + '\t')
         imgName = os.path.basename(item['img'])
         fp.write(imgName + '\t')
         if os.path.exists(imgName):
             pass
         else:
             with open(imgName, 'wb') as fp:
                 response = urllib2.urlopen(item['img'])
                 fp.write(response.read())
         fp.write(item['shidu'].encode('utf-8') + '\t')
         fp.write(item['air'].encode('utf-8') + '\n\n')
         time.sleep(1)
     m1.info('同步结束')
     return item
Exemple #7
0
 def __init__(self):
     self.log=MyLog()
     self.testTime()
     self.testLocaltime()
     self.testSleep()
     self.testStrftime()
Exemple #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from myLog import MyLog

if __name__ == '__main__':
    ml = MyLog()
    ml.debug("I am the debug message")
    ml.info("I am the info message")
    ml.warn("I am the warn message")
    ml.error("I am the error message")
    ml.critical("I am the critical message")
Exemple #9
0
# coding:utf-8

from myLog import MyLog

mylog = MyLog()


def testMylog():
    try:
        a = 1 / 0
    except ZeroDivisionError as e:
        mylog.error("def testMylog " + str(e))


if __name__ == '__main__':
    testMylog()
Exemple #10
0
def testLog():
    mylog = MyLog()
    mylog.debug('it is debug')
    mylog.error("I'm error")
Exemple #11
0
def delete_proxy(self, proxy):
    requests.get("ht("
    t("http://127.0.0.1:5010/delete/?proxy={}".
    {}
    ".format(proxy))


def get_html(self, url):
    retry_count = 5
    proxy = self.get_pro_proxy()
    print(proxy)
    while retry_count > 0:
        try:
            html = requests.get(url(url, proxies={"http": "http://{}".format(proxy)})
            # 使用代理访问
            return rn
            html.text

        except Exception:
            retry_count -= 1
        # 出错5次, 删除代理池中代理
    self.delete_pro_proxy(proxy)
    self.get_htm_html(url)
logger = MyLog()
def get_html(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        logger.error('get_html出错页面为: ' + url)
        return " ERROR when get html"
def get_content(url):
    print("当前爬取的网页为"+url)
    soup = BeautifulSoup(get_html(url),'lxml')
    try:
        forum_name = soup.find('p',attrs={'class':"nrbt"}).a.text
        print("当前爬取的论坛名为:"+forum_name)
    except:
        logger.error("get_forum_name出错"+"页面为"+url)
        forum_name = None
    try:
        time = soup.find('p',attrs={'class':"fbsj"}).text[4:]
        time = datetime.strptime(time,'%Y-%m-%d %H:%M')
        print("这个帖子的发表时间为:"+time)
    except:
        logger.error("get_publish_time出错" + "页面为" + url)
        time = None
    try:
        topic = soup.find('div',attrs={'class':"nr_r_c"}).find('p',attrs={'class':"contitle"}).text
        print("帖子的主题为:"+topic)
        topic = None
    except:
        logger.error("get_topic出错" + "页面为" + url)
        topic = None
    # all_neirong = []
    # for part in soup.find_all('div',attrs={"class":"neirong"}):
    #     # print(part.text)
    #     neirong_div = part.children
    #     neirong = ''
    #     try:
    #         neirong+=neirong.text
    #     except:
    #         None
    #     for i in neirong_div:
    #         try:
    #             img = i.find('img')
    #             if img:
    #                 neirong = neirong+'['+img.attrs['src'] + ']'
    #         except:
    #             None
    #             try:
    #                 text = i.text
    #                 # print('text'+text)
    #                 neirong = neirong+text
    #             except:
    #                 None
    #     # print("模块的内容为:"+neirong)
    #     if neirong != None:
    #         all_neirong.append(neirong)


    print(all_neirong)









# get_content("http://bbs.12365auto.com/postcontent.aspx?tID=47547&sId=1527&ppage=1&from=s")
get_content("http://bbs.12365auto.com/postcontent.aspx?tID=133692&sId=1147&ppage=1&from=s")