Esempio n. 1
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(StandardPartAgg, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
     self.prefixIndex = 0
Esempio n. 2
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__startPageNum = None
     self.__endPageNum = None
     super(SatAirCrawlerManager, self).__init__(json_config_file, 0.001, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 3
0
 def __init__(self, controller, dbProxy, request):
     super(EnterpriseListCrawler, self).__init__(controller, dbProxy,
                                                 request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__continentCode = request[
         EnterpriseListCrawler.PARA_CONTINENT_CODE]
     self.__countryCode = request[EnterpriseListCrawler.PARA_COUNTRY_CODE]
     self.__orgId = request[EnterpriseListCrawler.PARA_ORG_ID]
Esempio n. 4
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cagePrefixList = []
     super(NSNCageCatalogCrawlerManager,
           self).__init__(json_config_file, 0.1, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 5
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__imgOnly = False
     self.__imgSavePath = ''
     super(AviAllCrawlerManager, self).__init__(json_config_file, 0.001,
                                                None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 6
0
 def __init__(self, controller, dbProxy, request):
     super(PartCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__enterpriseId = request[PartCrawler.PARA_ENTERPRISE_ID]
     self.__licenceId = request[PartCrawler.PARA_LICENCE_ID]
     self.__startNo = None
     self.__enterpriseOnly = request[PartCrawler.PARA_ENTERPRISE_ONLY]
     if PartCrawler.PARA_SPECIFIC_STARTNO in request:
         self.__startNo = request[PartCrawler.PARA_SPECIFIC_STARTNO]
Esempio n. 7
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cageNumIndex = [-1, 0, 0, 0, 0]
     self.__noMore = False
     self.__cageNumList = []
     super(NSNCageCrawlerManager, self).__init__(json_config_file, 0.1,
                                                 None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 8
0
    def __init__(self, controller, dbProxy, request):
        super(AviAllCrawler, self).__init__(controller, dbProxy, request)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
        self.__url = request[AviAllCrawler.PARA_URL]
        self.__pageType = request[AviAllCrawler.PARA_PAGE_TYPE]
        if AviAllCrawler.PARA_IMG_ONLY not in request:
            self.__imgOnly = False
        else:
            self.__imgOnly = request[AviAllCrawler.PARA_IMG_ONLY]

        self.__imgSavePath = request[AviAllCrawler.PARA_IMG_SAVE_PATH]
Esempio n. 9
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cageNumIndex = [0, 0, 0, 0, 0]
     self.__noMore = False
     self.__startCageNum = None
     self.__endCageNum = None
     self.__parentPath = None
     super(NSNCageFileCrawlerManager,
           self).__init__(json_config_file, 0.1, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 10
0
    def __init__(self, json_config_file):
        '''
        Crawl for enterprise:
CREATE TABLE `enterprise` (
  `ENTERPRISE_ID` varchar(63) NOT NULL DEFAULT '',
  `ENTERPRISE_NAME` varchar(255) DEFAULT NULL,
  `licence_id` varchar(63) DEFAULT NULL,
  `COUNTRY_CODE` varchar(10) DEFAULT NULL,
  `ORGID` varchar(63) DEFAULT NULL,
  `certificate_no` varchar(63) DEFAULT NULL,
  `EXPIRED_DATE` date DEFAULT NULL,
  `address` varchar(1023) DEFAULT NULL,
  `scan_copy_link` varchar(1023) DEFAULT NULL,
  PRIMARY KEY (`ENTERPRISE_ID`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8         
        
        '''
        super(EnterpriseListCrawlerManager,
              self).__init__(json_config_file, 0.1, None)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 11
0
    def __init__(self, json_config_file):
        '''
        CREATE TABLE `part` (
  `enterprise_id` varchar(63) DEFAULT NULL,
  `licence_id` varchar(63) DEFAULT NULL,
  `aircraft_part_id` varchar(63) DEFAULT NULL,
  `ata_chapter_section` varchar(63) DEFAULT NULL,
  `category_no` varchar(63) DEFAULT NULL,
  `parts_number` varchar(63) DEFAULT NULL,
  `parts_name` varchar(255) DEFAULT NULL,
  `manufacturers` varchar(63) DEFAULT NULL,
  `inspection` char(1) DEFAULT '0',
  `repair` char(1) DEFAULT '0',
  `modification` char(1) DEFAULT '0',
  `overhaul` char(1) DEFAULT '0',
  `file_to_accord` varchar(255) DEFAULT NULL,
  `main_devices` varchar(255) DEFAULT NULL,
  `remark` text
) ENGINE=MyISAM DEFAULT CHARSET=utf8
        Constructor
        '''
        self.__enterpriseOnly = False
        super(PartCrawlerManager, self).__init__(json_config_file, 0.1, None)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
Esempio n. 12
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(NSNCleaner, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
Esempio n. 13
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(AirBusPart, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
Esempio n. 14
0
        VALUES ("%s", "%s", "%s","%s", "%s", "%s", %d)
        ''' % (bsn, msn, partNum, spn, partName.replace(
                    '"', '\\"'), cageCode, ataCode)
                insertCount += 1

            #execute the sql if necessary
            if sql is not None:
                self.dstDbProxy.execute(sql)

        sql = 'update part_clean set clean_flag=1 where tid in (%s)' % ','.join(
            tidSet)
        self.srcDbProxy.execute(sql)
        self.dstDbProxy.commit()
        self.srcDbProxy.commit()
        #self.processFinish = True
        if finishFlag:
            self.processFinish = True
        self.logger.info(
            'Totally processed %d. Insert:%d, Update:%d, Existing:%d',
            self.currentNo, insertCount, updateCount, exitingCount)


if __name__ == '__main__':
    pid = os.getpid()
    PIDUtils.writePid(LOGGER_NAME_CLENER, pid)
    Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg'))
    ins = AirBusPart(os.path.join('conf', LOGGER_NAME_CLENER + '.cfg'))
    ins.start()
    pidutils = PIDUtils(LOGGER_NAME_CLENER, ins.shutDown, 5, ins.logger)
    pidutils.start()
    sys.exit(0)
Esempio n. 15
0
 def __init__(self, controller, dbProxy, request):
     super(NSNCageFileCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__cageNumFilePath = request[
         NSNCageFileCrawler.PARA_CAGE_FILE_PATH]
     self.__cageNum = request[NSNCageFileCrawler.PARA_CAGE_NUM]
Esempio n. 16
0
        sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join(
            vendorList)

        self.dbProxy.execute(sql)
        self.dbProxy.commit()


class Vendor(object):
    def __init__(self, vendorCode, vendorName):
        self.vendorCode = vendorCode
        self.vendorName = vendorName
        self.address = ''
        self.isDummy = 0


if __name__ == '__main__':
    import platform
    if 'window' in platform.system().lower():
        Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg'))
    else:
        Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg'))
    conf = {
        CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost',
        CrawlerConstants.CONFIG_FILE_DBPORT: 3306,
        CrawlerConstants.CONFIG_FILE_DBUSER: '******',
        CrawlerConstants.CONFIG_FILE_DBPASS: '******',
        CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus'
    }
    parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME))
    fileName = 'F:\\tmp\\vendor.txt'
    parser.parse(fileName)
Esempio n. 17
0
 def __init__(self, controller, dbProxy, request):
     super(SatAirCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__url = request[SatAirCrawler.PARA_URL]
     self.__pageType = request[SatAirCrawler.PARA_PAGE_TYPE]
Esempio n. 18
0
            cageName = tds[2].text.strip()
            self.logger.debug('CageNum:%s, CageName:%s', cageNum, cageName)
            self.totalNum += 1

        nextPageDisabled = soup.findAll('li', {'class': 'next disabled'})
        if len(nextPageDisabled) > 0:
            return CrawlerConstants.VAL_STATUS_FINISH
        else:
            nextPage = soup.findAll('li', {'class': 'next'})
            if len(nextPage) > 0:
                return CrawlerConstants.VAL_STATUS_MORE
            else:
                return CrawlerConstants.VAL_STATUS_FINISH


if __name__ == '__main__':
    '''
    if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL):
        print 'Previous process is on-going, please stop it firstly'
        sys.exit(1)
    '''
    pid = os.getpid()
    PIDUtils.writePid(LOGGER_NAME_CRAWL, pid)
    Logging.initLogger('conf/crawler.logging.cfg')
    #Logging.initLogger('F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.cfg')
    ins = NSNCageCatalogCrawlerManager('conf/' + LOGGER_NAME_CRAWL + '.cfg')
    #ins = NSNCageCrawlerManager('F:\\program\\crm\\crawler\\src\\python\\conf\\nsn.cfg')
    ins.start()
    pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger)
    pidutils.start()
    sys.exit(0)
Esempio n. 19
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(DuplicateAgg, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
Esempio n. 20
0
 def __init__(self, controller, dbProxy, request):
     super(NSNCageCatalogCrawler, self).__init__(controller, dbProxy,
                                                 request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__cagePrefix = request[NSNCageCatalogCrawler.PARA_CAGE_PREFIX]
Esempio n. 21
0
            if len(ref) == 13:
                previousFullWithoutHyphen = ref
            else:
                ref = previousFullWithoutHyphen[:13 - len(ref)] + ref
            ref = ref[:4] + '-' + ref[4:6] + '-' + ref[6:9] + '-' + ref[9:]
            if ref == nsnNum:
                continue
            referenceList.append(ref)

        return referenceList


if __name__ == '__main__':
    '''
    if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL):
        print 'Previous process is on-going, please stop it firstly'
        sys.exit(1)
    '''
    pid = os.getpid()
    PIDUtils.writePid(LOGGER_NAME_CRAWL, pid)
    #Logging.initLogger('conf/crawler.logging.cfg')
    Logging.initLogger(
        'F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.win.cfg'
    )
    #ins = NSNCageFileCrawlerManager('conf/'+LOGGER_NAME_CRAWL+'.cfg')
    ins = NSNCageFileCrawlerManager(
        'F:\\program\\crm\\crawler\\src\\python\\conf\\nfc.cfg')
    ins.start()
    pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger)
    pidutils.start()
    sys.exit(0)