def __init__(self, confFile): ''' Constructor ''' super(StandardPartAgg, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER) self.prefixIndex = 0
def __init__(self, json_config_file): ''' Constructor ''' self.__startPageNum = None self.__endPageNum = None super(SatAirCrawlerManager, self).__init__(json_config_file, 0.001, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(EnterpriseListCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__continentCode = request[ EnterpriseListCrawler.PARA_CONTINENT_CODE] self.__countryCode = request[EnterpriseListCrawler.PARA_COUNTRY_CODE] self.__orgId = request[EnterpriseListCrawler.PARA_ORG_ID]
def __init__(self, json_config_file): ''' Constructor ''' self.__cagePrefixList = [] super(NSNCageCatalogCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' Constructor ''' self.__imgOnly = False self.__imgSavePath = '' super(AviAllCrawlerManager, self).__init__(json_config_file, 0.001, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(PartCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__enterpriseId = request[PartCrawler.PARA_ENTERPRISE_ID] self.__licenceId = request[PartCrawler.PARA_LICENCE_ID] self.__startNo = None self.__enterpriseOnly = request[PartCrawler.PARA_ENTERPRISE_ONLY] if PartCrawler.PARA_SPECIFIC_STARTNO in request: self.__startNo = request[PartCrawler.PARA_SPECIFIC_STARTNO]
def __init__(self, json_config_file): ''' Constructor ''' self.__cageNumIndex = [-1, 0, 0, 0, 0] self.__noMore = False self.__cageNumList = [] super(NSNCageCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(AviAllCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__url = request[AviAllCrawler.PARA_URL] self.__pageType = request[AviAllCrawler.PARA_PAGE_TYPE] if AviAllCrawler.PARA_IMG_ONLY not in request: self.__imgOnly = False else: self.__imgOnly = request[AviAllCrawler.PARA_IMG_ONLY] self.__imgSavePath = request[AviAllCrawler.PARA_IMG_SAVE_PATH]
def __init__(self, json_config_file): ''' Constructor ''' self.__cageNumIndex = [0, 0, 0, 0, 0] self.__noMore = False self.__startCageNum = None self.__endCageNum = None self.__parentPath = None super(NSNCageFileCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' Crawl for enterprise: CREATE TABLE `enterprise` ( `ENTERPRISE_ID` varchar(63) NOT NULL DEFAULT '', `ENTERPRISE_NAME` varchar(255) DEFAULT NULL, `licence_id` varchar(63) DEFAULT NULL, `COUNTRY_CODE` varchar(10) DEFAULT NULL, `ORGID` varchar(63) DEFAULT NULL, `certificate_no` varchar(63) DEFAULT NULL, `EXPIRED_DATE` date DEFAULT NULL, `address` varchar(1023) DEFAULT NULL, `scan_copy_link` varchar(1023) DEFAULT NULL, PRIMARY KEY (`ENTERPRISE_ID`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8 ''' super(EnterpriseListCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' CREATE TABLE `part` ( `enterprise_id` varchar(63) DEFAULT NULL, `licence_id` varchar(63) DEFAULT NULL, `aircraft_part_id` varchar(63) DEFAULT NULL, `ata_chapter_section` varchar(63) DEFAULT NULL, `category_no` varchar(63) DEFAULT NULL, `parts_number` varchar(63) DEFAULT NULL, `parts_name` varchar(255) DEFAULT NULL, `manufacturers` varchar(63) DEFAULT NULL, `inspection` char(1) DEFAULT '0', `repair` char(1) DEFAULT '0', `modification` char(1) DEFAULT '0', `overhaul` char(1) DEFAULT '0', `file_to_accord` varchar(255) DEFAULT NULL, `main_devices` varchar(255) DEFAULT NULL, `remark` text ) ENGINE=MyISAM DEFAULT CHARSET=utf8 Constructor ''' self.__enterpriseOnly = False super(PartCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, confFile): ''' Constructor ''' super(NSNCleaner, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
def __init__(self, confFile): ''' Constructor ''' super(AirBusPart, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
VALUES ("%s", "%s", "%s","%s", "%s", "%s", %d) ''' % (bsn, msn, partNum, spn, partName.replace( '"', '\\"'), cageCode, ataCode) insertCount += 1 #execute the sql if necessary if sql is not None: self.dstDbProxy.execute(sql) sql = 'update part_clean set clean_flag=1 where tid in (%s)' % ','.join( tidSet) self.srcDbProxy.execute(sql) self.dstDbProxy.commit() self.srcDbProxy.commit() #self.processFinish = True if finishFlag: self.processFinish = True self.logger.info( 'Totally processed %d. Insert:%d, Update:%d, Existing:%d', self.currentNo, insertCount, updateCount, exitingCount) if __name__ == '__main__': pid = os.getpid() PIDUtils.writePid(LOGGER_NAME_CLENER, pid) Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg')) ins = AirBusPart(os.path.join('conf', LOGGER_NAME_CLENER + '.cfg')) ins.start() pidutils = PIDUtils(LOGGER_NAME_CLENER, ins.shutDown, 5, ins.logger) pidutils.start() sys.exit(0)
def __init__(self, controller, dbProxy, request): super(NSNCageFileCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__cageNumFilePath = request[ NSNCageFileCrawler.PARA_CAGE_FILE_PATH] self.__cageNum = request[NSNCageFileCrawler.PARA_CAGE_NUM]
sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join( vendorList) self.dbProxy.execute(sql) self.dbProxy.commit() class Vendor(object): def __init__(self, vendorCode, vendorName): self.vendorCode = vendorCode self.vendorName = vendorName self.address = '' self.isDummy = 0 if __name__ == '__main__': import platform if 'window' in platform.system().lower(): Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg')) else: Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg')) conf = { CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost', CrawlerConstants.CONFIG_FILE_DBPORT: 3306, CrawlerConstants.CONFIG_FILE_DBUSER: '******', CrawlerConstants.CONFIG_FILE_DBPASS: '******', CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus' } parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME)) fileName = 'F:\\tmp\\vendor.txt' parser.parse(fileName)
def __init__(self, controller, dbProxy, request): super(SatAirCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__url = request[SatAirCrawler.PARA_URL] self.__pageType = request[SatAirCrawler.PARA_PAGE_TYPE]
cageName = tds[2].text.strip() self.logger.debug('CageNum:%s, CageName:%s', cageNum, cageName) self.totalNum += 1 nextPageDisabled = soup.findAll('li', {'class': 'next disabled'}) if len(nextPageDisabled) > 0: return CrawlerConstants.VAL_STATUS_FINISH else: nextPage = soup.findAll('li', {'class': 'next'}) if len(nextPage) > 0: return CrawlerConstants.VAL_STATUS_MORE else: return CrawlerConstants.VAL_STATUS_FINISH if __name__ == '__main__': ''' if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL): print 'Previous process is on-going, please stop it firstly' sys.exit(1) ''' pid = os.getpid() PIDUtils.writePid(LOGGER_NAME_CRAWL, pid) Logging.initLogger('conf/crawler.logging.cfg') #Logging.initLogger('F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.cfg') ins = NSNCageCatalogCrawlerManager('conf/' + LOGGER_NAME_CRAWL + '.cfg') #ins = NSNCageCrawlerManager('F:\\program\\crm\\crawler\\src\\python\\conf\\nsn.cfg') ins.start() pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger) pidutils.start() sys.exit(0)
def __init__(self, confFile): ''' Constructor ''' super(DuplicateAgg, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
def __init__(self, controller, dbProxy, request): super(NSNCageCatalogCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__cagePrefix = request[NSNCageCatalogCrawler.PARA_CAGE_PREFIX]
if len(ref) == 13: previousFullWithoutHyphen = ref else: ref = previousFullWithoutHyphen[:13 - len(ref)] + ref ref = ref[:4] + '-' + ref[4:6] + '-' + ref[6:9] + '-' + ref[9:] if ref == nsnNum: continue referenceList.append(ref) return referenceList if __name__ == '__main__': ''' if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL): print 'Previous process is on-going, please stop it firstly' sys.exit(1) ''' pid = os.getpid() PIDUtils.writePid(LOGGER_NAME_CRAWL, pid) #Logging.initLogger('conf/crawler.logging.cfg') Logging.initLogger( 'F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.win.cfg' ) #ins = NSNCageFileCrawlerManager('conf/'+LOGGER_NAME_CRAWL+'.cfg') ins = NSNCageFileCrawlerManager( 'F:\\program\\crm\\crawler\\src\\python\\conf\\nfc.cfg') ins.start() pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger) pidutils.start() sys.exit(0)