def __init__(self, name=None, **kwargs):
        XMLFeedSpider.__init__(self, name=name, **kwargs)
        whitelistFile = open('whitelist_urls_test.csv')
        for line in whitelistFile:
            feed_url = line.split(',')[1]
            self.start_urls.append(feed_url)
            self.visited_urls[feed_url] = {}

        self.logger_file.write("Started")
Beispiel #2
0
    def __init__(self, *arg, **argdict):
        logging.info('---------------------%s--------------' % dir(self))

        self.titleXpath = ''
        self.descriptionXpath = ''
        self.descriptionLenght = 0
        self.linkXpath = ''
        self.imgUrlXpath = ''
        self.imageNum = 1
        self.videoUrlXpath = ''
        self.pubDateXpath = ''
        self.guidXpath = ''
        self.rule_id = ''
        self.is_remove_namespaces = False
        self.initConfig(argdict)
        XMLFeedSpider.__init__(self, *arg)
        self.currentNode = None
Beispiel #3
0
    def __init__(self, *arg, **argdict):
        logging.info('---------------------%s--------------' % dir(self))

        self.titleXpath = ''
        self.descriptionXpath = ''
        self.descriptionLenght = 0
        self.linkXpath = ''
        self.imgUrlXpath = ''
        self.imageNum = 1
        self.videoUrlXpath = ''
        self.pubDateXpath = ''
        self.guidXpath = ''
        self.rule_id = ''
        self.is_remove_namespaces = False
        self.initConfig(argdict)
        XMLFeedSpider.__init__(self, *arg)
        self.currentNode = None
Beispiel #4
0
 def __init__(self,*arg,**argdict):
     self.initConfig(*arg,**argdict)
     XMLFeedSpider.__init__(*arg,**argdict)
 def __init__(self, vin, *args, **kwargs):
     logging.info("Spider " + self.name + " is calling")
     XMLFeedSpider.__init__(self).__init__(*args, **kwargs)
     self.vin = vin